Actividad 6 - 2.2 Pruebas de raíz unitaria - Equipo¶

Johnson & Johnson (JNJ) vs The Coca-Cola Company (KO) | 2022-01-01 to 2025-10-10

Pruebas integrales de raiz unitaria, modelado ARIMA, pronostico y analisis de cointegracion.

In [ ]:
import yfinance as yf
import pandas as pd

# Set the date range
start_date = '2022-01-01'
end_date = '2025-10-10'

# Fetch stock data for Johnson & Johnson (JNJ)
jnj = yf.download('JNJ', start=start_date, end=end_date)

# Display the first 5 rows
print(jnj.head(5))
[*********************100%***********************]  1 of 1 completed
Price            Close        High         Low        Open   Volume
Ticker             JNJ         JNJ         JNJ         JNJ      JNJ
Date                                                               
2022-01-03  153.545593  153.599311  151.361559  152.355121  6012200
2022-01-04  153.133850  154.216914  152.704206  153.115944  6748400
2022-01-05  154.154297  155.273173  153.617231  153.957374  7016100
2022-01-06  153.626160  154.404893  152.883224  154.091602  7301600
2022-01-07  155.702774  156.016065  153.178597  153.473982  6986000

In [ ]:
import yfinance as yf
import pandas as pd

# Set the date range
start_date = '2022-01-01'
end_date = '2025-10-10'

# Fetch stock data for The Coca-Cola Company (KO)
ko = yf.download('KO', start=start_date, end=end_date)

# Display the first 5 rows
print(ko.head(5))
[*********************100%***********************]  1 of 1 completed
Price           Close       High        Low       Open    Volume
Ticker             KO         KO         KO         KO        KO
Date                                                            
2022-01-03  52.996700  53.005639  52.174493  52.567722  20187300
2022-01-04  53.881470  54.104896  53.255876  53.380994  26141600
2022-01-05  54.328323  54.694742  53.666980  53.702729  22507300
2022-01-06  54.042339  54.623245  54.015526  54.042339  17902300
2022-01-07  53.917213  54.265758  53.675913  53.872525  12307900

In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings

# Suppress the FutureWarning
warnings.filterwarnings('ignore', category=FutureWarning)

# Download stock data from Yahoo Finance
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['KO', 'JNJ']

# Download data with explicit auto_adjust=False to get Adj Close column
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)['Adj Close']

# If the above still fails, use this alternative approach:
# data = yf.download(tickers, start=start_date, end=end_date)
# data = data['Close']  # Use regular Close prices instead

# Extract individual series
ko_close = data['KO']
jnj_close = data['JNJ']

print("KO data length:", len(ko_close))
print("JNJ data length:", len(jnj_close))
print("\nKO data sample:")
print(ko_close.head())
print("\nJNJ data sample:")
print(jnj_close.head())

# Remove any NaN values
ko_close = ko_close.dropna()
jnj_close = jnj_close.dropna()

# Align by common dates
common_index = ko_close.index.intersection(jnj_close.index)
ko_close = ko_close.loc[common_index]
jnj_close = jnj_close.loc[common_index]

print(f"\nCommon data length after alignment: {len(ko_close)}")
print(f"Date range: {ko_close.index[0]} to {ko_close.index[-1]}")

# Function for unit root tests
def unit_root_tests(series, name):
    print(f"\nUnit Root Tests for {name}:")

    # ADF Test
    adf_result = adfuller(series)
    print("ADF Test:")
    print(f'ADF Statistic: {adf_result[0]:.4f}')
    print(f'p-value: {adf_result[1]:.4f}')
    print(f'Critical Values: {adf_result[4]}')

    # KPSS Test
    kpss_result = kpss(series, regression='c')
    print("\nKPSS Test:")
    print(f'KPSS Statistic: {kpss_result[0]:.4f}')
    print(f'p-value: {kpss_result[1]:.4f}')
    print(f'Critical Values: {kpss_result[3]}')

# Perform unit root tests
unit_root_tests(ko_close, "KO")
unit_root_tests(jnj_close, "JNJ")

# Difference the series if non-stationary
ko_diff = ko_close.diff().dropna()
jnj_diff = jnj_close.diff().dropna()

# Function to find best ARMA model (using ARIMA with d=0)
def find_best_arma(series, name, max_p=3, max_q=3):
    best_aic = float('inf')
    best_order = None

    for p in range(max_p + 1):
        for q in range(max_q + 1):
            try:
                model = ARIMA(series, order=(p, 0, q))
                results = model.fit()
                if results.aic < best_aic:
                    best_aic = results.aic
                    best_order = (p, 0, q)
            except:
                continue

    print(f"\nBest ARMA model for {name}:")
    print(f"Order: {best_order}")
    print(f"AIC: {best_aic:.2f}")

    best_model = ARIMA(series, order=best_order).fit()
    return best_model

# Fit ARMA models
ko_arma = find_best_arma(ko_diff, "KO")
jnj_arma = find_best_arma(jnj_diff, "JNJ")

# Cointegration test
def cointegration_test(df):
    result = coint_johansen(df, det_order=0, k_ar_diff=1)
    print("\nJohansen Cointegration Test:")
    print(f"Trace statistic: {result.lr1}")
    print(f"Critical values (90%, 95%, 99%): {result.cvt}")

    for i in range(len(result.lr1)):
        if result.lr1[i] > result.cvt[i, 1]:  # 95% critical value
            print(f"r = {i}: Cointegration exists at 95% confidence level")
        else:
            print(f"r = {i}: No cointegration at 95% confidence level")

# Prepare data for cointegration
coint_df = pd.DataFrame({
    'KO': ko_close,
    'JNJ': jnj_close
})

# Run cointegration test
cointegration_test(coint_df)

# Plot the series
plt.figure(figsize=(12,6))
plt.plot(ko_close, label='KO')
plt.plot(jnj_close, label='JNJ')
plt.title('KO vs JNJ Adjusted Closing Prices')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Plot the differenced series
plt.figure(figsize=(12,6))
plt.plot(ko_diff, label='KO Diff', alpha=0.7)
plt.plot(jnj_diff, label='JNJ Diff', alpha=0.7)
plt.title('Differenced Series')
plt.xlabel('Date')
plt.ylabel('Price Change ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Print summary statistics
print("\nSummary Statistics:")
print(ko_close.describe())
print("\n")
print(jnj_close.describe())
[*********************100%***********************]  2 of 2 completed
KO data length: 946
JNJ data length: 946

KO data sample:
Date
2022-01-03    52.996700
2022-01-04    53.881470
2022-01-05    54.328323
2022-01-06    54.042339
2022-01-07    53.917213
Name: KO, dtype: float64

JNJ data sample:
Date
2022-01-03    153.545593
2022-01-04    153.133850
2022-01-05    154.154297
2022-01-06    153.626160
2022-01-07    155.702774
Name: JNJ, dtype: float64

Common data length after alignment: 946
Date range: 2022-01-03 00:00:00 to 2025-10-09 00:00:00

Unit Root Tests for KO:
ADF Test:
ADF Statistic: -1.6646
p-value: 0.4495
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}

KPSS Test:
KPSS Statistic: 3.3544
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}

Unit Root Tests for JNJ:
ADF Test:
ADF Statistic: -1.3590
p-value: 0.6018
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}

KPSS Test:
KPSS Statistic: 0.5093
p-value: 0.0396
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}

Best ARMA model for KO:
Order: (0, 0, 0)
AIC: 1740.05

Best ARMA model for JNJ:
Order: (3, 0, 1)
AIC: 3657.76

Johansen Cointegration Test:
Trace statistic: [9.56967308 2.82926924]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
r = 0: No cointegration at 95% confidence level
r = 1: No cointegration at 95% confidence level
No description has been provided for this image
No description has been provided for this image
Summary Statistics:
count    946.000000
mean      59.843887
std        5.674025
min       49.325050
25%       55.762364
50%       57.922932
75%       63.459334
max       72.819817
Name: KO, dtype: float64


count    946.000000
mean     153.386964
std        8.841785
min      136.714722
25%      147.932114
50%      152.329422
75%      158.662666
max      191.080002
Name: JNJ, dtype: float64

Conclusiones:

In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

# Download stock data from Yahoo Finance
print("Downloading KO and JNJ data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['KO', 'JNJ']

# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
ko_close = data['Adj Close']['KO'].dropna()
jnj_close = data['Adj Close']['JNJ'].dropna()

# Align both series to common dates
common_index = ko_close.index.intersection(jnj_close.index)
ko_close = ko_close.loc[common_index]
jnj_close = jnj_close.loc[common_index]

print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(ko_close)}")
print(f"Date range: {ko_close.index[0].date()} to {ko_close.index[-1].date()}")

# Function for unit root tests
def unit_root_tests(series, name):
    print(f"\nUnit Root Tests for {name}:")

    # ADF Test
    adf_result = adfuller(series)
    print("ADF Test:")
    print(f'ADF Statistic: {adf_result[0]:.4f}')
    print(f'p-value: {adf_result[1]:.4f}')
    print(f'Critical Values: {adf_result[4]}')

    # KPSS Test
    kpss_result = kpss(series)
    print("\nKPSS Test:")
    print(f'KPSS Statistic: {kpss_result[0]:.4f}')
    print(f'p-value: {kpss_result[1]:.4f}')
    print(f'Critical Values: {kpss_result[3]}')

# Perform unit root tests
unit_root_tests(ko_close, "KO")
unit_root_tests(jnj_close, "JNJ")

# Cointegration test
def cointegration_test(df):
    result = coint_johansen(df, det_order=0, k_ar_diff=1)
    print("\nJohansen Cointegration Test:")
    print(f"Trace statistic: {result.lr1}")
    print(f"Critical values (90%, 95%, 99%): {result.cvt}")

    for i in range(len(result.lr1)):
        if result.lr1[i] > result.cvt[i, 1]:
            print(f"r = {i}: Cointegration exists at 95% confidence level")
        else:
            print(f"r = {i}: No cointegration at 95% confidence level")

# Prepare data for cointegration
coint_df = pd.DataFrame({
    'KO': ko_close,
    'JNJ': jnj_close
}).dropna()
cointegration_test(coint_df)

# Function to find best ARIMA model
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
    best_aic = float('inf')
    best_order = None

    for p in range(max_p + 1):
        for d in range(max_d + 1):
            for q in range(max_q + 1):
                try:
                    model = ARIMA(series, order=(p, d, q))
                    results = model.fit()
                    if results.aic < best_aic:
                        best_aic = results.aic
                        best_order = (p, d, q)
                except:
                    continue

    print(f"\nBest ARIMA model for {name}:")
    print(f"Order: {best_order}")
    print(f"AIC: {best_aic:.2f}")
    return best_order

# Find and fit best ARIMA models
ko_order = find_best_arima(ko_close, "KO")
jnj_order = find_best_arima(jnj_close, "JNJ")

# Fit final ARIMA models
ko_model = ARIMA(ko_close, order=ko_order).fit()
jnj_model = ARIMA(jnj_close, order=jnj_order).fit()

# Forecast next 30 periods
forecast_steps = 30
ko_forecast = ko_model.forecast(steps=forecast_steps)
jnj_forecast = jnj_model.forecast(steps=forecast_steps)

# Create forecast index using business days
last_date = ko_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                               periods=forecast_steps, freq='B')

# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(ko_close.index, ko_close, label='KO Historical')
plt.plot(forecast_index, ko_forecast, label='KO Forecast', color='red')
plt.plot(jnj_close.index, jnj_close, label='JNJ Historical')
plt.plot(forecast_index, jnj_forecast, label='JNJ Forecast', color='green')
plt.title('KO and JNJ Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Plot detailed forecast with confidence intervals
def plot_forecast(model, series, name, steps=30):
    forecast_obj = model.get_forecast(steps=steps)
    forecast = forecast_obj.predicted_mean
    conf_int = forecast_obj.conf_int()

    # Create proper date index for forecast
    last_date = series.index[-1]
    forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                                   periods=steps, freq='B')

    plt.figure(figsize=(12,6))
    plt.plot(series.index, series, label=f'{name} Historical')
    plt.plot(forecast_index, forecast, label='Forecast', color='red')
    plt.fill_between(forecast_index,
                    conf_int.iloc[:, 0],
                    conf_int.iloc[:, 1],
                    color='pink',
                    alpha=0.3,
                    label='95% Confidence Interval')
    plt.title(f'{name} Price Forecast')
    plt.xlabel('Date')
    plt.ylabel('Adjusted Close Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

# Generate detailed forecast plots
plot_forecast(ko_model, ko_close, "KO")
plot_forecast(jnj_model, jnj_close, "JNJ")

# Print forecast values
print("\nKO Forecast Values (next 5 periods):")
print(ko_forecast[:5])
print("\nJNJ Forecast Values (next 5 periods):")
print(jnj_forecast[:5])

# Print model diagnostics
print("\n" + "="*50)
print("MODEL DIAGNOSTICS")
print("="*50)
print(f"\nKO Model: ARIMA{ko_order}")
print(f"JNJ Model: ARIMA{jnj_order}")
print("\nNote: Use model.summary() for detailed parameter estimates and diagnostics")
print("The models automatically handle:")
print("- Unit root testing (via optimal differencing d)")
print("- Cointegration analysis (Johansen test)")
print("- Optimal parameter selection (AIC minimization)")
print("- 30-day ahead forecasting with confidence intervals")
[*********************100%***********************]  2 of 2 completed
Downloading KO and JNJ data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09

Unit Root Tests for KO:

ADF Test:
ADF Statistic: -1.6646
p-value: 0.4495
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}

KPSS Test:
KPSS Statistic: 3.3544
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}

Unit Root Tests for JNJ:
ADF Test:
ADF Statistic: -1.3590
p-value: 0.6018
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}

KPSS Test:
KPSS Statistic: 0.5093
p-value: 0.0396
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}

Johansen Cointegration Test:
Trace statistic: [9.56967308 2.82926924]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
r = 0: No cointegration at 95% confidence level
r = 1: No cointegration at 95% confidence level

Best ARIMA model for KO:
Order: (2, 1, 2)
AIC: 1734.92

Best ARIMA model for JNJ:
Order: (3, 1, 1)
AIC: 3656.40
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
KO Forecast Values (next 5 periods):
946    66.190071
947    66.116038
948    66.190193
949    66.368658
950    66.547343
Name: predicted_mean, dtype: float64

JNJ Forecast Values (next 5 periods):
946    190.756738
947    190.891776
948    190.686445
949    190.902232
950    190.696202
Name: predicted_mean, dtype: float64

==================================================
MODEL DIAGNOSTICS
==================================================

KO Model: ARIMA(2, 1, 2)
JNJ Model: ARIMA(3, 1, 1)

Note: Use model.summary() for detailed parameter estimates and diagnostics
The models automatically handle:
- Unit root testing (via optimal differencing d)
- Cointegration analysis (Johansen test)
- Optimal parameter selection (AIC minimization)
- 30-day ahead forecasting with confidence intervals

Conclusiones:

In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

# Download stock data from Yahoo Finance
print("Downloading KO and JNJ data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['KO', 'JNJ']

# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
ko_close = data['Adj Close']['KO'].dropna()
jnj_close = data['Adj Close']['JNJ'].dropna()

# Align both series to common dates
common_index = ko_close.index.intersection(jnj_close.index)
ko_close = ko_close.loc[common_index]
jnj_close = jnj_close.loc[common_index]

print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(ko_close)}")
print(f"Date range: {ko_close.index[0].date()} to {ko_close.index[-1].date()}")

# Function for unit root tests with interpretation
def unit_root_tests(series, name):
    print(f"\nUnit Root Tests for {name}:")

    # ADF Test
    adf_result = adfuller(series)
    print("ADF Test:")
    print(f'ADF Statistic: {adf_result[0]:.4f}')
    print(f'p-value: {adf_result[1]:.4f}')
    print(f'Critical Values: {adf_result[4]}')
    print("Interpretation:")
    if adf_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")

    # KPSS Test
    kpss_result = kpss(series)
    print("\nKPSS Test:")
    print(f'KPSS Statistic: {kpss_result[0]:.4f}')
    print(f'p-value: {kpss_result[1]:.4f}')
    print(f'Critical Values: {kpss_result[3]}')
    print("Interpretation:")
    if kpss_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is non-stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be stationary")

# Perform unit root tests
unit_root_tests(ko_close, "KO")
unit_root_tests(jnj_close, "JNJ")

# Cointegration test with interpretation
def cointegration_test(df):
    result = coint_johansen(df, det_order=0, k_ar_diff=1)
    print("\nJohansen Cointegration Test:")
    print(f"Trace statistic: {result.lr1}")
    print(f"Critical values (90%, 95%, 99%): {result.cvt}")
    print("Interpretation:")
    for i in range(len(result.lr1)):
        if result.lr1[i] > result.cvt[i, 1]:
            print(f"  - r = {i}: Cointegration exists at 95% confidence level")
            print(f"    Trace statistic ({result.lr1[i]:.2f}) > 95% critical value ({result.cvt[i, 1]:.2f})")
        else:
            print(f"  - r = {i}: No cointegration at 95% confidence level")
            print(f"    Trace statistic ({result.lr1[i]:.2f}) <= 95% critical value ({result.cvt[i, 1]:.2f})")
    if result.lr1[0] > result.cvt[0, 1]:
        print("Conclusion: KO and JNJ are cointegrated - they share a long-run equilibrium relationship")
    else:
        print("Conclusion: No evidence of cointegration between KO and JNJ")

# Prepare data for cointegration
coint_df = pd.DataFrame({
    'KO': ko_close,
    'JNJ': jnj_close
}).dropna()
cointegration_test(coint_df)

# Function to find best ARIMA model with interpretation
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
    best_aic = float('inf')
    best_order = None

    for p in range(max_p + 1):
        for d in range(max_d + 1):
            for q in range(max_q + 1):
                try:
                    model = ARIMA(series, order=(p, d, q))
                    results = model.fit()
                    if results.aic < best_aic:
                        best_aic = results.aic
                        best_order = (p, d, q)
                except:
                    continue

    print(f"\nBest ARIMA model for {name}:")
    print(f"Order: {best_order}")
    print(f"AIC: {best_aic:.2f}")
    print("Interpretation:")
    print(f"  - p={best_order[0]}: {best_order[0]} autoregressive term(s)")
    print(f"  - d={best_order[1]}: {best_order[1]} difference(s) needed for stationarity")
    print(f"  - q={best_order[2]}: {best_order[2]} moving average term(s)")
    return best_order

# Find and fit best ARIMA models
ko_order = find_best_arima(ko_close, "KO")
jnj_order = find_best_arima(jnj_close, "JNJ")

# Fit final ARIMA models
ko_model = ARIMA(ko_close, order=ko_order).fit()
jnj_model = ARIMA(jnj_close, order=jnj_order).fit()

# Forecast next 30 periods
forecast_steps = 30
ko_forecast = ko_model.forecast(steps=forecast_steps)
jnj_forecast = jnj_model.forecast(steps=forecast_steps)

# Create forecast index using business days
last_date = ko_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                               periods=forecast_steps, freq='B')

# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(ko_close.index, ko_close, label='KO Historical')
plt.plot(forecast_index, ko_forecast, label='KO Forecast', color='red')
plt.plot(jnj_close.index, jnj_close, label='JNJ Historical')
plt.plot(forecast_index, jnj_forecast, label='JNJ Forecast', color='green')
plt.title('KO and JNJ Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Detailed forecast plot with confidence intervals and interpretation
def plot_forecast(model, series, name, steps=30):
    forecast_obj = model.get_forecast(steps=steps)
    forecast = forecast_obj.predicted_mean
    conf_int = forecast_obj.conf_int()

    # Create proper date index for forecast
    last_date = series.index[-1]
    forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                                   periods=steps, freq='B')

    plt.figure(figsize=(12,6))
    plt.plot(series.index, series, label=f'{name} Historical')
    plt.plot(forecast_index, forecast, label='Forecast', color='red')
    plt.fill_between(forecast_index,
                    conf_int.iloc[:, 0],
                    conf_int.iloc[:, 1],
                    color='pink',
                    alpha=0.3,
                    label='95% Confidence Interval')
    plt.title(f'{name} Price Forecast')
    plt.xlabel('Date')
    plt.ylabel('Adjusted Close Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

    # Forecast interpretation
    last_value = series.iloc[-1]
    mean_forecast = forecast.mean()
    print(f"\nForecast Interpretation for {name}:")
    print(f"Last observed value: ${last_value:.2f}")
    print(f"Average forecast value: ${mean_forecast:.2f}")
    print(f"Forecast change: ${mean_forecast - last_value:.2f}")
    if mean_forecast > last_value:
        print("Trend: Upward forecast trend")
    elif mean_forecast < last_value:
        print("Trend: Downward forecast trend")
    else:
        print("Trend: Flat forecast trend")
    print(f"95% CI range at period {steps}: [${conf_int.iloc[-1, 0]:.2f}, ${conf_int.iloc[-1, 1]:.2f}]")

# Generate detailed forecast plots and interpretations
plot_forecast(ko_model, ko_close, "KO")
plot_forecast(jnj_model, jnj_close, "JNJ")

# Print forecast values
print("\nKO Forecast Values (next 5 periods):")
print(ko_forecast[:5])
print("\nJNJ Forecast Values (next 5 periods):")
print(jnj_forecast[:5])

print("\n" + "="*60)
print("ANALYSIS COMPLETE")
print("="*60)
print("✓ Unit root tests performed (ADF & KPSS)")
print("✓ Cointegration analysis completed (Johansen test)")
print("✓ Optimal ARIMA models selected via AIC minimization")
print("✓ 30-day forecasts generated with 95% confidence intervals")
print("✓ Detailed interpretations provided for all results")
print(f"\nData period: {start_date} to {end_date}")
print(f"Tickers analyzed: KO (Coca-Cola) and JNJ (Johnson & Johnson)")
[*********************100%***********************]  2 of 2 completed
Downloading KO and JNJ data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09

Unit Root Tests for KO:
ADF Test:
ADF Statistic: -1.6646
p-value: 0.4495
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value >= 0.05: Fail to reject null - KO may be non-stationary

KPSS Test:
KPSS Statistic: 3.3544
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - KO is non-stationary

Unit Root Tests for JNJ:
ADF Test:
ADF Statistic: -1.3590
p-value: 0.6018
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value >= 0.05: Fail to reject null - JNJ may be non-stationary

KPSS Test:
KPSS Statistic: 0.5093
p-value: 0.0396
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - JNJ is non-stationary

Johansen Cointegration Test:
Trace statistic: [9.56967308 2.82926924]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
Interpretation:
  - r = 0: No cointegration at 95% confidence level
    Trace statistic (9.57) <= 95% critical value (15.49)
  - r = 1: No cointegration at 95% confidence level
    Trace statistic (2.83) <= 95% critical value (3.84)
Conclusion: No evidence of cointegration between KO and JNJ

Best ARIMA model for KO:
Order: (2, 1, 2)
AIC: 1734.92
Interpretation:
  - p=2: 2 autoregressive term(s)
  - d=1: 1 difference(s) needed for stationarity
  - q=2: 2 moving average term(s)

Best ARIMA model for JNJ:
Order: (3, 1, 1)
AIC: 3656.40
Interpretation:
  - p=3: 3 autoregressive term(s)
  - d=1: 1 difference(s) needed for stationarity
  - q=1: 1 moving average term(s)
No description has been provided for this image
No description has been provided for this image
Forecast Interpretation for KO:
Last observed value: $66.37
Average forecast value: $66.36
Forecast change: $-0.01
Trend: Downward forecast trend
95% CI range at period 30: [$60.02, $73.19]
No description has been provided for this image
Forecast Interpretation for JNJ:
Last observed value: $191.08
Average forecast value: $190.80
Forecast change: $-0.28
Trend: Downward forecast trend
95% CI range at period 30: [$174.56, $207.16]

KO Forecast Values (next 5 periods):
946    66.190071
947    66.116038
948    66.190193
949    66.368658
950    66.547343
Name: predicted_mean, dtype: float64

JNJ Forecast Values (next 5 periods):
946    190.756738
947    190.891776
948    190.686445
949    190.902232
950    190.696202
Name: predicted_mean, dtype: float64

============================================================
ANALYSIS COMPLETE
============================================================
✓ Unit root tests performed (ADF & KPSS)
✓ Cointegration analysis completed (Johansen test)
✓ Optimal ARIMA models selected via AIC minimization
✓ 30-day forecasts generated with 95% confidence intervals
✓ Detailed interpretations provided for all results

Data period: 2022-01-01 to 2025-10-10
Tickers analyzed: KO (Coca-Cola) and JNJ (Johnson & Johnson)

Conclusiones:

In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

# Download stock data from Yahoo Finance
print("Downloading KO and JNJ data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['KO', 'JNJ']

# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
ko_close = data['Adj Close']['KO'].dropna()
jnj_close = data['Adj Close']['JNJ'].dropna()

# Align both series to common dates
common_index = ko_close.index.intersection(jnj_close.index)
ko_close = ko_close.loc[common_index]
jnj_close = jnj_close.loc[common_index]

print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(ko_close)}")
print(f"Date range: {ko_close.index[0].date()} to {ko_close.index[-1].date()}")

# Function for unit root tests with interpretation
def unit_root_tests(series, name):
    print(f"\nUnit Root Tests for {name}:")

    # ADF Test
    adf_result = adfuller(series)
    print("ADF Test:")
    print(f'ADF Statistic: {adf_result[0]:.4f}')
    print(f'p-value: {adf_result[1]:.4f}')
    print(f'Critical Values: {adf_result[4]}')
    print("Interpretation:")
    if adf_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")

    # KPSS Test
    kpss_result = kpss(series)
    print("\nKPSS Test:")
    print(f'KPSS Statistic: {kpss_result[0]:.4f}')
    print(f'p-value: {kpss_result[1]:.4f}')
    print(f'Critical Values: {kpss_result[3]}')
    print("Interpretation:")
    if kpss_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is non-stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be stationary")

# Perform unit root tests
unit_root_tests(ko_close, "KO")
unit_root_tests(jnj_close, "JNJ")

# Function to plot correlograms with interpretation
def plot_correlograms(series, name, lags=30):
    plt.figure(figsize=(12, 8))

    plt.subplot(2, 1, 1)
    plot_acf(series, lags=lags, ax=plt.gca())
    plt.title(f'ACF for {name}')

    plt.subplot(2, 1, 2)
    plot_pacf(series, lags=lags, ax=plt.gca())
    plt.title(f'PACF for {name}')

    plt.tight_layout()
    plt.show()

    print(f"\nCorrelogram Interpretation for {name}:")
    print("  - ACF: Shows total correlation at each lag, including indirect effects")
    print("  - PACF: Shows direct correlation at each lag, controlling for earlier lags")
    print("  - Significant spikes outside the blue confidence interval suggest strong correlations")
    print("  - ACF decay pattern indicates potential ARIMA model orders")
    print("  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order")

# Plot correlograms for original series
plot_correlograms(ko_close, "KO Original")
plot_correlograms(jnj_close, "JNJ Original")

# Difference the series
ko_diff = ko_close.diff().dropna()
jnj_diff = jnj_close.diff().dropna()

# Plot correlograms for differenced series
plot_correlograms(ko_diff, "KO Differenced")
plot_correlograms(jnj_diff, "JNJ Differenced")

# Cointegration test with interpretation
def cointegration_test(df):
    result = coint_johansen(df, det_order=0, k_ar_diff=1)
    print("\nJohansen Cointegration Test:")
    print(f"Trace statistic: {result.lr1}")
    print(f"Critical values (90%, 95%, 99%): {result.cvt}")
    print("Interpretation:")
    for i in range(len(result.lr1)):
        if result.lr1[i] > result.cvt[i, 1]:
            print(f"  - r = {i}: Cointegration exists at 95% confidence level")
            print(f"    Trace statistic ({result.lr1[i]:.2f}) > 95% critical value ({result.cvt[i, 1]:.2f})")
        else:
            print(f"  - r = {i}: No cointegration at 95% confidence level")
            print(f"    Trace statistic ({result.lr1[i]:.2f}) <= 95% critical value ({result.cvt[i, 1]:.2f})")
    if result.lr1[0] > result.cvt[0, 1]:
        print("Conclusion: KO and JNJ are cointegrated - they share a long-run equilibrium relationship")
    else:
        print("Conclusion: No evidence of cointegration between KO and JNJ")

# Prepare data for cointegration
coint_df = pd.DataFrame({
    'KO': ko_close,
    'JNJ': jnj_close
}).dropna()
cointegration_test(coint_df)

# Function to find best ARIMA model with interpretation
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
    best_aic = float('inf')
    best_order = None

    for p in range(max_p + 1):
        for d in range(max_d + 1):
            for q in range(max_q + 1):
                try:
                    model = ARIMA(series, order=(p, d, q))
                    results = model.fit()
                    if results.aic < best_aic:
                        best_aic = results.aic
                        best_order = (p, d, q)
                except:
                    continue

    print(f"\nBest ARIMA model for {name}:")
    print(f"Order: {best_order}")
    print(f"AIC: {best_aic:.2f}")
    print("Interpretation:")
    print(f"  - p={best_order[0]}: {best_order[0]} autoregressive term(s)")
    print(f"  - d={best_order[1]}: {best_order[1]} difference(s) needed for stationarity")
    print(f"  - q={best_order[2]}: {best_order[2]} moving average term(s)")
    return best_order

# Find and fit best ARIMA models
ko_order = find_best_arima(ko_close, "KO")
jnj_order = find_best_arima(jnj_close, "JNJ")

# Fit final ARIMA models
ko_model = ARIMA(ko_close, order=ko_order).fit()
jnj_model = ARIMA(jnj_close, order=jnj_order).fit()

# Forecast next 30 periods
forecast_steps = 30
ko_forecast = ko_model.forecast(steps=forecast_steps)
jnj_forecast = jnj_model.forecast(steps=forecast_steps)

# Create forecast index using business days
last_date = ko_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                               periods=forecast_steps, freq='B')

# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(ko_close.index, ko_close, label='KO Historical')
plt.plot(forecast_index, ko_forecast, label='KO Forecast', color='red')
plt.plot(jnj_close.index, jnj_close, label='JNJ Historical')
plt.plot(forecast_index, jnj_forecast, label='JNJ Forecast', color='green')
plt.title('KO and JNJ Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Detailed forecast plot with confidence intervals and interpretation
def plot_forecast(model, series, name, steps=30):
    forecast_obj = model.get_forecast(steps=steps)
    forecast = forecast_obj.predicted_mean
    conf_int = forecast_obj.conf_int()

    # Create proper date index for forecast
    last_date = series.index[-1]
    forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                                   periods=steps, freq='B')

    plt.figure(figsize=(12,6))
    plt.plot(series.index, series, label=f'{name} Historical')
    plt.plot(forecast_index, forecast, label='Forecast', color='red')
    plt.fill_between(forecast_index,
                    conf_int.iloc[:, 0],
                    conf_int.iloc[:, 1],
                    color='pink',
                    alpha=0.3,
                    label='95% Confidence Interval')
    plt.title(f'{name} Price Forecast')
    plt.xlabel('Date')
    plt.ylabel('Adjusted Close Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

    # Forecast interpretation
    last_value = series.iloc[-1]
    mean_forecast = forecast.mean()
    print(f"\nForecast Interpretation for {name}:")
    print(f"Last observed value: ${last_value:.2f}")
    print(f"Average forecast value: ${mean_forecast:.2f}")
    print(f"Forecast change: ${mean_forecast - last_value:.2f}")
    if mean_forecast > last_value:
        print("Trend: Upward forecast trend")
    elif mean_forecast < last_value:
        print("Trend: Downward forecast trend")
    else:
        print("Trend: Flat forecast trend")
    print(f"95% CI range at period {steps}: [${conf_int.iloc[-1, 0]:.2f}, ${conf_int.iloc[-1, 1]:.2f}]")
    print("Interpretation: The wider the confidence interval, the less certain the forecast")

# Generate detailed forecast plots and interpretations
plot_forecast(ko_model, ko_close, "KO")
plot_forecast(jnj_model, jnj_close, "JNJ")

# Plot correlograms for model residuals
plot_correlograms(ko_model.resid, "KO ARIMA Residuals")
plot_correlograms(jnj_model.resid, "JNJ ARIMA Residuals")

# Print forecast values
print("\nKO Forecast Values (next 5 periods):")
print(ko_forecast[:5])
print("\nJNJ Forecast Values (next 5 periods):")
print(jnj_forecast[:5])

print("\n" + "="*60)
print("COMPLETE TIME SERIES ANALYSIS")
print("="*60)
print("✓ Unit root tests (ADF & KPSS) performed")
print("✓ ACF/PACF correlograms analyzed (original & differenced)")
print("✓ Cointegration testing (Johansen test)")
print("✓ Optimal ARIMA models selected via AIC")
print("✓ 30-day forecasts with 95% confidence intervals")
print("✓ Residual diagnostics via correlograms")
print(f"\nData: KO (Coca-Cola) & JNJ (Johnson & Johnson)")
print(f"Period: {start_date} to {end_date}")
[*********************100%***********************]  2 of 2 completed
Downloading KO and JNJ data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09

Unit Root Tests for KO:
ADF Test:
ADF Statistic: -1.6646
p-value: 0.4495
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value >= 0.05: Fail to reject null - KO may be non-stationary

KPSS Test:
KPSS Statistic: 3.3544
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - KO is non-stationary

Unit Root Tests for JNJ:

ADF Test:
ADF Statistic: -1.3590
p-value: 0.6018
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value >= 0.05: Fail to reject null - JNJ may be non-stationary

KPSS Test:
KPSS Statistic: 0.5093
p-value: 0.0396
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - JNJ is non-stationary
No description has been provided for this image
Correlogram Interpretation for KO Original:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
No description has been provided for this image
Correlogram Interpretation for JNJ Original:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
No description has been provided for this image
Correlogram Interpretation for KO Differenced:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
No description has been provided for this image
Correlogram Interpretation for JNJ Differenced:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order

Johansen Cointegration Test:
Trace statistic: [9.56967308 2.82926924]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
Interpretation:
  - r = 0: No cointegration at 95% confidence level
    Trace statistic (9.57) <= 95% critical value (15.49)
  - r = 1: No cointegration at 95% confidence level
    Trace statistic (2.83) <= 95% critical value (3.84)
Conclusion: No evidence of cointegration between KO and JNJ

Best ARIMA model for KO:
Order: (2, 1, 2)
AIC: 1734.92
Interpretation:
  - p=2: 2 autoregressive term(s)
  - d=1: 1 difference(s) needed for stationarity
  - q=2: 2 moving average term(s)

Best ARIMA model for JNJ:
Order: (3, 1, 1)
AIC: 3656.40
Interpretation:
  - p=3: 3 autoregressive term(s)
  - d=1: 1 difference(s) needed for stationarity
  - q=1: 1 moving average term(s)
No description has been provided for this image
No description has been provided for this image
Forecast Interpretation for KO:
Last observed value: $66.37
Average forecast value: $66.36
Forecast change: $-0.01
Trend: Downward forecast trend
95% CI range at period 30: [$60.02, $73.19]
Interpretation: The wider the confidence interval, the less certain the forecast
No description has been provided for this image
Forecast Interpretation for JNJ:
Last observed value: $191.08
Average forecast value: $190.80
Forecast change: $-0.28
Trend: Downward forecast trend
95% CI range at period 30: [$174.56, $207.16]
Interpretation: The wider the confidence interval, the less certain the forecast
No description has been provided for this image
Correlogram Interpretation for KO ARIMA Residuals:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
No description has been provided for this image
Correlogram Interpretation for JNJ ARIMA Residuals:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order

KO Forecast Values (next 5 periods):
946    66.190071
947    66.116038
948    66.190193
949    66.368658
950    66.547343
Name: predicted_mean, dtype: float64

JNJ Forecast Values (next 5 periods):
946    190.756738
947    190.891776
948    190.686445
949    190.902232
950    190.696202
Name: predicted_mean, dtype: float64

============================================================
COMPLETE TIME SERIES ANALYSIS
============================================================
✓ Unit root tests (ADF & KPSS) performed
✓ ACF/PACF correlograms analyzed (original & differenced)
✓ Cointegration testing (Johansen test)
✓ Optimal ARIMA models selected via AIC
✓ 30-day forecasts with 95% confidence intervals
✓ Residual diagnostics via correlograms

Data: KO (Coca-Cola) & JNJ (Johnson & Johnson)
Period: 2022-01-01 to 2025-10-10

Conclusiones:

In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
from statsmodels.tsa.stattools import adfuller, zivot_andrews
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

# Download stock data from Yahoo Finance
print("Downloading KO and JNJ data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['KO', 'JNJ']

# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
ko_close = data['Adj Close']['KO'].dropna()
jnj_close = data['Adj Close']['JNJ'].dropna()

# Align both series to common dates
common_index = ko_close.index.intersection(jnj_close.index)
ko_close = ko_close.loc[common_index]
jnj_close = jnj_close.loc[common_index]

print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(ko_close)}")
print(f"Date range: {ko_close.index[0].date()} to {ko_close.index[-1].date()}")

# Function for Phillips-Perron test with interpretation
def phillips_perron_test(series, name):
    print(f"\nPhillips-Perron Test for {name}:")
    # Use adfuller with settings to approximate PP test
    pp_result = adfuller(series, regression='c', autolag='AIC', maxlag=None)
    print(f'PP Statistic: {pp_result[0]:.4f}')
    print(f'p-value: {pp_result[1]:.4f}')
    print(f'Critical Values: {pp_result[4]}')
    print("Interpretation:")
    if pp_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")
    print("  - Note: Using ADF with constant and automatic lag selection to approximate PP test")
    print("  - PP test adjusts for serial correlation and heteroskedasticity non-parametrically")
    return pp_result

# Function for Zivot-Andrews structural break test with interpretation
def zivot_andrews_test(series, name):
    print(f"\nZivot-Andrews Structural Break Test for {name}:")
    za_result = zivot_andrews(series, regression='c', autolag='AIC')
    print(f'ZA Statistic: {za_result[0]:.4f}')
    print(f'p-value: {za_result[1]:.4f}')
    print(f'Critical Values: {za_result[2]}')
    print(f'Breakpoint Index: {za_result[3]}')
    print("Interpretation:")
    if za_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} has a structural break")
        print(f"  - Breakpoint at index {za_result[3]} (position in series)")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - No clear evidence of a structural break")
    print("  - ZA test allows for a single break in intercept and/or trend")
    return za_result

# Perform Phillips-Perron tests
ko_pp = phillips_perron_test(ko_close, "KO")
jnj_pp = phillips_perron_test(jnj_close, "JNJ")

# Perform Zivot-Andrews tests
ko_za = zivot_andrews_test(ko_close, "KO")
jnj_za = zivot_andrews_test(jnj_close, "JNJ")

# Plot series with breakpoints
def plot_series_with_breakpoint(series, name, breakpoint_idx):
    plt.figure(figsize=(12, 6))
    plt.plot(series.index, series.values, label=f'{name} Adjusted Closing Prices')

    # Convert breakpoint index to actual date
    if 0 <= breakpoint_idx < len(series):
        breakpoint_date = series.index[breakpoint_idx]
        plt.axvline(x=breakpoint_date, color='red', linestyle='--',
                   label=f'Breakpoint ({breakpoint_date.date()})')
        print(f"  - Breakpoint date: {breakpoint_date.date()}")
    else:
        print(f"  - Warning: Breakpoint index {breakpoint_idx} out of range (0-{len(series)-1})")
        # Use a fallback vertical line at the middle of the series
        mid_idx = len(series) // 2
        mid_date = series.index[mid_idx]
        plt.axvline(x=mid_date, color='orange', linestyle=':',
                   label='Reference line (invalid breakpoint)')

    plt.title(f'{name} Adjusted Closing Prices with Structural Break')
    plt.xlabel('Date')
    plt.ylabel('Adjusted Close Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

    print(f"\nPlot Interpretation for {name}:")
    print(f"  - Red dashed line indicates the detected structural break")
    print("  - Break may reflect significant market events, policy changes, or economic shifts")
    print("  - Analyze data around this point for potential causes (e.g., earnings, news, COVID impact)")
    print("  - For KO and JNJ, consider pharmaceutical/healthcare events or consumer goods trends")

# Plot series with breakpoints using the series' own datetime index
plot_series_with_breakpoint(ko_close, "KO (Coca-Cola)", int(ko_za[3]))
plot_series_with_breakpoint(jnj_close, "JNJ (Johnson & Johnson)", int(jnj_za[3]))

# Summary analysis
print("\n" + "="*70)
print("STRUCTURAL BREAK ANALYSIS SUMMARY")
print("="*70)
print(f"KO Phillips-Perron p-value: {ko_pp[1]:.4f} {'(Stationary)' if ko_pp[1]<0.05 else '(Non-stationary)'}")
print(f"JNJ Phillips-Perron p-value: {jnj_pp[1]:.4f} {'(Stationary)' if jnj_pp[1]<0.05 else '(Non-stationary)'}")
print(f"\nKO Zivot-Andrews p-value: {ko_za[1]:.4f} {'(Structural break detected)' if ko_za[1]<0.05 else '(No clear break)'}")
print(f"KO Breakpoint: {ko_close.index[int(ko_za[3])].date() if 0 <= int(ko_za[3]) < len(ko_close) else 'Invalid'}")
print(f"JNJ Zivot-Andrews p-value: {jnj_za[1]:.4f} {'(Structural break detected)' if jnj_za[1]<0.05 else '(No clear break)'}")
print(f"JNJ Breakpoint: {jnj_close.index[int(jnj_za[3])].date() if 0 <= int(jnj_za[3]) < len(jnj_close) else 'Invalid'}")
print(f"\nData Period: {start_date} to {end_date}")
print("Analysis covers potential impacts from:")
print("- COVID-19 pandemic effects and recovery")
print("- Interest rate changes and inflation")
print("- Supply chain disruptions")
print("- Company-specific events (mergers, product launches, regulatory changes)")
print("- Broader market volatility")

print("\nRecommendations:")
print("1. If structural breaks detected, consider regime-switching models")
print("2. For non-stationary series, use differencing or cointegration approaches")
print("3. Investigate specific events around breakpoint dates")
print("4. Consider sector-specific factors (consumer goods for KO, healthcare for JNJ)")
print("5. Validate breakpoints with external economic calendars and company news")
[*********************100%***********************]  2 of 2 completed
Downloading KO and JNJ data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09

Phillips-Perron Test for KO:

PP Statistic: -1.6646
p-value: 0.4495
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value >= 0.05: Fail to reject null - KO may be non-stationary
  - Note: Using ADF with constant and automatic lag selection to approximate PP test
  - PP test adjusts for serial correlation and heteroskedasticity non-parametrically

Phillips-Perron Test for JNJ:
PP Statistic: -1.3590
p-value: 0.6018
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value >= 0.05: Fail to reject null - JNJ may be non-stationary
  - Note: Using ADF with constant and automatic lag selection to approximate PP test
  - PP test adjusts for serial correlation and heteroskedasticity non-parametrically

Zivot-Andrews Structural Break Test for KO:
ZA Statistic: -3.9519
p-value: 0.3809
Critical Values: {'1%': np.float64(-5.27644), '5%': np.float64(-4.81067), '10%': np.float64(-4.56618)}
Breakpoint Index: 0
Interpretation:
  - p-value >= 0.05: Fail to reject null - No clear evidence of a structural break
  - ZA test allows for a single break in intercept and/or trend

Zivot-Andrews Structural Break Test for JNJ:
ZA Statistic: -2.2551
p-value: 0.9926
Critical Values: {'1%': np.float64(-5.27644), '5%': np.float64(-4.81067), '10%': np.float64(-4.56618)}
Breakpoint Index: 2
Interpretation:
  - p-value >= 0.05: Fail to reject null - No clear evidence of a structural break
  - ZA test allows for a single break in intercept and/or trend
  - Breakpoint date: 2022-01-03
No description has been provided for this image
Plot Interpretation for KO (Coca-Cola):
  - Red dashed line indicates the detected structural break
  - Break may reflect significant market events, policy changes, or economic shifts
  - Analyze data around this point for potential causes (e.g., earnings, news, COVID impact)
  - For KO and JNJ, consider pharmaceutical/healthcare events or consumer goods trends
  - Breakpoint date: 2022-01-05
No description has been provided for this image
Plot Interpretation for JNJ (Johnson & Johnson):
  - Red dashed line indicates the detected structural break
  - Break may reflect significant market events, policy changes, or economic shifts
  - Analyze data around this point for potential causes (e.g., earnings, news, COVID impact)
  - For KO and JNJ, consider pharmaceutical/healthcare events or consumer goods trends

======================================================================
STRUCTURAL BREAK ANALYSIS SUMMARY
======================================================================
KO Phillips-Perron p-value: 0.4495 (Non-stationary)
JNJ Phillips-Perron p-value: 0.6018 (Non-stationary)

KO Zivot-Andrews p-value: 0.3809 (No clear break)
KO Breakpoint: 2022-01-03
JNJ Zivot-Andrews p-value: 0.9926 (No clear break)
JNJ Breakpoint: 2022-01-05

Data Period: 2022-01-01 to 2025-10-10
Analysis covers potential impacts from:
- COVID-19 pandemic effects and recovery
- Interest rate changes and inflation
- Supply chain disruptions
- Company-specific events (mergers, product launches, regulatory changes)
- Broader market volatility

Recommendations:
1. If structural breaks detected, consider regime-switching models
2. For non-stationary series, use differencing or cointegration approaches
3. Investigate specific events around breakpoint dates
4. Consider sector-specific factors (consumer goods for KO, healthcare for JNJ)
5. Validate breakpoints with external economic calendars and company news

Apple (AAPL) vs Walt Disney Company (DIS) | 2022-01-01 to 2025-10-10¶

In [ ]:
import yfinance as yf
import pandas as pd

# Set the date range
start_date = '2022-01-01'
end_date = '2025-10-10'

# Fetch stock data for Apple (AAPL)
aapl = yf.download('AAPL', start=start_date, end=end_date)

# Display the first 5 rows
print(aapl.head(5))
[*********************100%***********************]  1 of 1 completed
Price            Close        High         Low        Open     Volume
Ticker            AAPL        AAPL        AAPL        AAPL       AAPL
Date                                                                 
2022-01-03  178.443100  179.296061  174.227380  174.345024  104487900
2022-01-04  176.178436  179.354948  175.609801  179.051025   99310400
2022-01-05  171.492050  176.639165  171.217539  176.090142   94537600
2022-01-06  168.629272  171.864605  168.276327  169.315551   96904000
2022-01-07  168.795944  170.727339  167.678286  169.501835   86709100

In [ ]:
import yfinance as yf
import pandas as pd

# Set the date range
start_date = '2022-01-01'
end_date = '2025-10-10'

# Fetch stock data for The Walt Disney Company (DIS)
dis = yf.download('DIS', start=start_date, end=end_date)

# Display the first 5 rows
print(dis.head(5))
[*********************100%***********************]  1 of 1 completed
Price            Close        High         Low        Open    Volume
Ticker             DIS         DIS         DIS         DIS       DIS
Date                                                                
2022-01-03  154.189560  154.976445  152.812522  153.274817  10222800
2022-01-04  153.176422  157.691169  152.999381  155.989526  16582000
2022-01-05  152.645309  156.766607  152.556789  153.953503  12272100
2022-01-06  154.327240  155.182984  151.160038  153.678074  11095300
2022-01-07  155.242004  156.687901  153.727248  154.327246   9554600

In [ ]:
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings

# Suppress the FutureWarning
warnings.filterwarnings('ignore', category=FutureWarning)

# Download stock data from Yahoo Finance
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['AAPL', 'DIS']

# Download data with explicit auto_adjust=False to get Adj Close column
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)['Adj Close']

# If the above still fails, use this alternative approach:
# data = yf.download(tickers, start=start_date, end=end_date)
# data = data['Close']  # Use regular Close prices instead

# Extract individual series
dis_close = data['DIS']
aapl_close = data['AAPL']

print("DIS data length:", len(dis_close))
print("AAPL data length:", len(aapl_close))
print("\nDIS data sample:")
print(dis_close.head())
print("\nAAPL data sample:")
print(aapl_close.head())

# Remove any NaN values
dis_close = dis_close.dropna()
aapl_close = aapl_close.dropna()

# Align by common dates
common_index = dis_close.index.intersection(aapl_close.index)
dis_close = dis_close.loc[common_index]
aapl_close = aapl_close.loc[common_index]

print(f"\nCommon data length after alignment: {len(dis_close)}")
print(f"Date range: {dis_close.index[0]} to {dis_close.index[-1]}")

# Function for unit root tests
def unit_root_tests(series, name):
    print(f"\nUnit Root Tests for {name}:")

    # ADF Test
    adf_result = adfuller(series)
    print("ADF Test:")
    print(f'ADF Statistic: {adf_result[0]:.4f}')
    print(f'p-value: {adf_result[1]:.4f}')
    print(f'Critical Values: {adf_result[4]}')

    # KPSS Test
    kpss_result = kpss(series, regression='c')
    print("\nKPSS Test:")
    print(f'KPSS Statistic: {kpss_result[0]:.4f}')
    print(f'p-value: {kpss_result[1]:.4f}')
    print(f'Critical Values: {kpss_result[3]}')

# Perform unit root tests
unit_root_tests(dis_close, "DIS")
unit_root_tests(aapl_close, "AAPL")

# Difference the series if non-stationary
dis_diff = dis_close.diff().dropna()
aapl_diff = aapl_close.diff().dropna()

# Function to find best ARMA model (using ARIMA with d=0)
def find_best_arma(series, name, max_p=3, max_q=3):
    best_aic = float('inf')
    best_order = None

    for p in range(max_p + 1):
        for q in range(max_q + 1):
            try:
                model = ARIMA(series, order=(p, 0, q))
                results = model.fit()
                if results.aic < best_aic:
                    best_aic = results.aic
                    best_order = (p, 0, q)
            except:
                continue

    print(f"\nBest ARMA model for {name}:")
    print(f"Order: {best_order}")
    print(f"AIC: {best_aic:.2f}")

    best_model = ARIMA(series, order=best_order).fit()
    return best_model

# Fit ARMA models
dis_arma = find_best_arma(dis_diff, "DIS")
aapl_arma = find_best_arma(series=aapl_diff, name="AAPL")

# Cointegration test
def cointegration_test(df):
    result = coint_johansen(df, det_order=0, k_ar_diff=1)
    print("\nJohansen Cointegration Test:")
    print(f"Trace statistic: {result.lr1}")
    print(f"Critical values (90%, 95%, 99%): {result.cvt}")

    for i in range(len(result.lr1)):
        if result.lr1[i] > result.cvt[i, 1]:  # 95% critical value
            print(f"r = {i}: Cointegration exists at 95% confidence level")
        else:
            print(f"r = {i}: No cointegration at 95% confidence level")

# Prepare data for cointegration
coint_df = pd.DataFrame({
    'DIS': dis_close,
    'AAPL': aapl_close
})

# Run cointegration test
cointegration_test(coint_df)

# Plot the series
plt.figure(figsize=(12,6))
plt.plot(dis_close, label='DIS')
plt.plot(aapl_close, label='AAPL')
plt.title('DIS vs AAPL Adjusted Closing Prices')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Plot the differenced series
plt.figure(figsize=(12,6))
plt.plot(dis_diff, label='DIS Diff', alpha=0.7)
plt.plot(aapl_diff, label='AAPL Diff', alpha=0.7)
plt.title('Differenced Series')
plt.xlabel('Date')
plt.ylabel('Price Change ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Print summary statistics
print("\nSummary Statistics:")
print(dis_close.describe())
print("\n")
print(aapl_close.describe())
[                       0%                       ]
[*********************100%***********************]  2 of 2 completed
DIS data length: 946
AAPL data length: 946

DIS data sample:
Date
2022-01-03    154.189560
2022-01-04    153.176422
2022-01-05    152.645309
2022-01-06    154.327240
2022-01-07    155.242004
Name: DIS, dtype: float64

AAPL data sample:
Date
2022-01-03    178.443100
2022-01-04    176.178436
2022-01-05    171.492050
2022-01-06    168.629272
2022-01-07    168.795944
Name: AAPL, dtype: float64

Common data length after alignment: 946
Date range: 2022-01-03 00:00:00 to 2025-10-09 00:00:00

Unit Root Tests for DIS:
ADF Test:
ADF Statistic: -3.3087
p-value: 0.0145
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}

KPSS Test:
KPSS Statistic: 0.7122
p-value: 0.0124
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}

Unit Root Tests for AAPL:
ADF Test:
ADF Statistic: -0.8342
p-value: 0.8089
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}

KPSS Test:
KPSS Statistic: 3.8415
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}

Best ARMA model for DIS:
Order: (2, 0, 3)
AIC: 3954.67

Best ARMA model for AAPL:
Order: (3, 0, 1)
AIC: 4934.01

Johansen Cointegration Test:
Trace statistic: [12.12681683  1.01459214]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
r = 0: No cointegration at 95% confidence level
r = 1: No cointegration at 95% confidence level
No description has been provided for this image
No description has been provided for this image
Summary Statistics:
count    946.000000
mean     103.151529
std       15.573004
min       78.019356
25%       91.578011
50%       99.899529
75%      112.197226
max      155.301010
Name: DIS, dtype: float64


count    946.000000
mean     185.420839
std       32.913288
min      123.281334
25%      160.700706
50%      180.398621
75%      213.006443
max      258.103729
Name: AAPL, dtype: float64

Conclusiones

In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

# Download stock data from Yahoo Finance
print("Downloading DIS and AAPL data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['DIS', 'AAPL']

# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
dis_close = data['Adj Close']['DIS'].dropna()
aapl_close = data['Adj Close']['AAPL'].dropna()

# Align both series to common dates
common_index = dis_close.index.intersection(aapl_close.index)
dis_close = dis_close.loc[common_index]
aapl_close = aapl_close.loc[common_index]

print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(dis_close)}")
print(f"Date range: {dis_close.index[0].date()} to {dis_close.index[-1].date()}")

# Function for unit root tests
def unit_root_tests(series, name):
    print(f"\nUnit Root Tests for {name}:")

    # ADF Test
    adf_result = adfuller(series)
    print("ADF Test:")
    print(f'ADF Statistic: {adf_result[0]:.4f}')
    print(f'p-value: {adf_result[1]:.4f}')
    print(f'Critical Values: {adf_result[4]}')

    # KPSS Test
    kpss_result = kpss(series)
    print("\nKPSS Test:")
    print(f'KPSS Statistic: {kpss_result[0]:.4f}')
    print(f'p-value: {kpss_result[1]:.4f}')
    print(f'Critical Values: {kpss_result[3]}')

# Perform unit root tests
unit_root_tests(dis_close, "DIS")
unit_root_tests(aapl_close, "AAPL")

# Cointegration test
def cointegration_test(df):
    result = coint_johansen(df, det_order=0, k_ar_diff=1)
    print("\nJohansen Cointegration Test:")
    print(f"Trace statistic: {result.lr1}")
    print(f"Critical values (90%, 95%, 99%): {result.cvt}")

    for i in range(len(result.lr1)):
        if result.lr1[i] > result.cvt[i, 1]:
            print(f"r = {i}: Cointegration exists at 95% confidence level")
        else:
            print(f"r = {i}: No cointegration at 95% confidence level")

# Prepare data for cointegration
coint_df = pd.DataFrame({
    'DIS': dis_close,
    'AAPL': aapl_close
}).dropna()
cointegration_test(coint_df)

# Function to find best ARIMA model
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
    best_aic = float('inf')
    best_order = None

    for p in range(max_p + 1):
        for d in range(max_d + 1):
            for q in range(max_q + 1):
                try:
                    model = ARIMA(series, order=(p, d, q))
                    results = model.fit()
                    if results.aic < best_aic:
                        best_aic = results.aic
                        best_order = (p, d, q)
                except:
                    continue

    print(f"\nBest ARIMA model for {name}:")
    print(f"Order: {best_order}")
    print(f"AIC: {best_aic:.2f}")
    return best_order

# Find and fit best ARIMA models
dis_order = find_best_arima(dis_close, "DIS")
aapl_order = find_best_arima(aapl_close, "AAPL")

# Fit final ARIMA models
dis_model = ARIMA(dis_close, order=dis_order).fit()
aapl_model = ARIMA(aapl_close, order=aapl_order).fit()

# Forecast next 30 periods
forecast_steps = 30
dis_forecast = dis_model.forecast(steps=forecast_steps)
aapl_forecast = aapl_model.forecast(steps=forecast_steps)

# Create forecast index using business days
last_date = dis_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                               periods=forecast_steps, freq='B')

# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(dis_close.index, dis_close, label='DIS Historical')
plt.plot(forecast_index, dis_forecast, label='DIS Forecast', color='red')
plt.plot(aapl_close.index, aapl_close, label='AAPL Historical')
plt.plot(forecast_index, aapl_forecast, label='AAPL Forecast', color='green')
plt.title('DIS and AAPL Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Plot detailed forecast with confidence intervals
def plot_forecast(model, series, name, steps=30):
    forecast_obj = model.get_forecast(steps=steps)
    forecast = forecast_obj.predicted_mean
    conf_int = forecast_obj.conf_int()

    # Create proper date index for forecast
    last_date = series.index[-1]
    forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                                   periods=steps, freq='B')

    plt.figure(figsize=(12,6))
    plt.plot(series.index, series, label=f'{name} Historical')
    plt.plot(forecast_index, forecast, label='Forecast', color='red')
    plt.fill_between(forecast_index,
                    conf_int.iloc[:, 0],
                    conf_int.iloc[:, 1],
                    color='pink',
                    alpha=0.3,
                    label='95% Confidence Interval')
    plt.title(f'{name} Price Forecast')
    plt.xlabel('Date')
    plt.ylabel('Adjusted Close Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

# Generate detailed forecast plots
plot_forecast(dis_model, dis_close, "DIS")
plot_forecast(aapl_model, aapl_close, "AAPL")

# Print forecast values
print("\nDIS Forecast Values (next 5 periods):")
print(dis_forecast[:5])
print("\nAAPL Forecast Values (next 5 periods):")
print(aapl_forecast[:5])

# Print model diagnostics
print("\n" + "="*50)
print("MODEL DIAGNOSTICS")
print("="*50)
print(f"\nKO Model: ARIMA{dis_order}")
print(f"JNJ Model: ARIMA{aapl_order}")
print("\nNote: Use model.summary() for detailed parameter estimates and diagnostics")
print("The models automatically handle:")
print("- Unit root testing (via optimal differencing d)")
print("- Cointegration analysis (Johansen test)")
print("- Optimal parameter selection (AIC minimization)")
print("- 30-day ahead forecasting with confidence intervals")
[*********************100%***********************]  2 of 2 completed
Downloading DIS and AAPL data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09

Unit Root Tests for DIS:
ADF Test:
ADF Statistic: -3.3087
p-value: 0.0145
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}

KPSS Test:
KPSS Statistic: 0.7122
p-value: 0.0124
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}

Unit Root Tests for AAPL:
ADF Test:
ADF Statistic: -0.8342
p-value: 0.8089
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}

KPSS Test:
KPSS Statistic: 3.8415
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}

Johansen Cointegration Test:
Trace statistic: [12.12681683  1.01459214]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
r = 0: No cointegration at 95% confidence level
r = 1: No cointegration at 95% confidence level

Best ARIMA model for DIS:
Order: (2, 1, 3)
AIC: 3953.08

Best ARIMA model for AAPL:
Order: (1, 1, 0)
AIC: 4932.70
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
DIS Forecast Values (next 5 periods):
946    111.258834
947    111.758426
948    112.129465
949    112.166258
950    111.876199
Name: predicted_mean, dtype: float64

AAPL Forecast Values (next 5 periods):
946    253.853858
947    253.845240
948    253.844840
949    253.844822
950    253.844821
Name: predicted_mean, dtype: float64

==================================================
MODEL DIAGNOSTICS
==================================================

KO Model: ARIMA(2, 1, 3)
JNJ Model: ARIMA(1, 1, 0)

Note: Use model.summary() for detailed parameter estimates and diagnostics
The models automatically handle:
- Unit root testing (via optimal differencing d)
- Cointegration analysis (Johansen test)
- Optimal parameter selection (AIC minimization)
- 30-day ahead forecasting with confidence intervals

Conclusiones

In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

# Download stock data from Yahoo Finance
print("Downloading DIS and AAPL data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['DIS', 'AAPL']

# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
dis_close = data['Adj Close']['DIS'].dropna()
aapl_close = data['Adj Close']['AAPL'].dropna()

# Align both series to common dates
common_index = dis_close.index.intersection(aapl_close.index)
dis_close = dis_close.loc[common_index]
aapl_close = aapl_close.loc[common_index]

print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(dis_close)}")
print(f"Date range: {dis_close.index[0].date()} to {dis_close.index[-1].date()}")

# Function for unit root tests with interpretation
def unit_root_tests(series, name):
    print(f"\nUnit Root Tests for {name}:")

    # ADF Test
    adf_result = adfuller(series)
    print("ADF Test:")
    print(f'ADF Statistic: {adf_result[0]:.4f}')
    print(f'p-value: {adf_result[1]:.4f}')
    print(f'Critical Values: {adf_result[4]}')
    print("Interpretation:")
    if adf_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")

    # KPSS Test
    kpss_result = kpss(series)
    print("\nKPSS Test:")
    print(f'KPSS Statistic: {kpss_result[0]:.4f}')
    print(f'p-value: {kpss_result[1]:.4f}')
    print(f'Critical Values: {kpss_result[3]}')
    print("Interpretation:")
    if kpss_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is non-stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be stationary")

# Perform unit root tests
unit_root_tests(dis_close, "DIS")
unit_root_tests(aapl_close, "AAPL")

# Cointegration test with interpretation
def cointegration_test(df):
    result = coint_johansen(df, det_order=0, k_ar_diff=1)
    print("\nJohansen Cointegration Test:")
    print(f"Trace statistic: {result.lr1}")
    print(f"Critical values (90%, 95%, 99%): {result.cvt}")
    print("Interpretation:")
    for i in range(len(result.lr1)):
        if result.lr1[i] > result.cvt[i, 1]:
            print(f"  - r = {i}: Cointegration exists at 95% confidence level")
            print(f"    Trace statistic ({result.lr1[i]:.2f}) > 95% critical value ({result.cvt[i, 1]:.2f})")
        else:
            print(f"  - r = {i}: No cointegration at 95% confidence level")
            print(f"    Trace statistic ({result.lr1[i]:.2f}) <= 95% critical value ({result.cvt[i, 1]:.2f})")
    if result.lr1[0] > result.cvt[0, 1]:
        print("Conclusion: DIS and AAPL are cointegrated - they share a long-run equilibrium relationship")
    else:
        print("Conclusion: No evidence of cointegration between DIS and AAPL")

# Prepare data for cointegration
coint_df = pd.DataFrame({
    'DIS': dis_close,
    'AAPL': aapl_close
}).dropna()
cointegration_test(coint_df)

# Function to find best ARIMA model with interpretation
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
    best_aic = float('inf')
    best_order = None

    for p in range(max_p + 1):
        for d in range(max_d + 1):
            for q in range(max_q + 1):
                try:
                    model = ARIMA(series, order=(p, d, q))
                    results = model.fit()
                    if results.aic < best_aic:
                        best_aic = results.aic
                        best_order = (p, d, q)
                except:
                    continue

    print(f"\nBest ARIMA model for {name}:")
    print(f"Order: {best_order}")
    print(f"AIC: {best_aic:.2f}")
    print("Interpretation:")
    print(f"  - p={best_order[0]}: {best_order[0]} autoregressive term(s)")
    print(f"  - d={best_order[1]}: {best_order[1]} difference(s) needed for stationarity")
    print(f"  - q={best_order[2]}: {best_order[2]} moving average term(s)")
    return best_order

# Find and fit best ARIMA models
dis_order = find_best_arima(dis_close, "DIS")
aapl_order = find_best_arima(aapl_close, "AAPL")

# Fit final ARIMA models
dis_model = ARIMA(dis_close, order=dis_order).fit()
aapl_model = ARIMA(aapl_close, order=aapl_order).fit()

# Forecast next 30 periods
forecast_steps = 30
dis_forecast = dis_model.forecast(steps=forecast_steps)
aapl_forecast = aapl_model.forecast(steps=forecast_steps)

# Create forecast index using business days
last_date = dis_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                               periods=forecast_steps, freq='B')

# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(dis_close.index, dis_close, label='DIS Historical')
plt.plot(forecast_index, dis_forecast, label='DIS Forecast', color='red')
plt.plot(aapl_close.index, aapl_close, label='AAPL Historical')
plt.plot(forecast_index, aapl_forecast, label='AAPL Forecast', color='green')
plt.title('DIS and AAPL Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Detailed forecast plot with confidence intervals and interpretation
def plot_forecast(model, series, name, steps=30):
    forecast_obj = model.get_forecast(steps=steps)
    forecast = forecast_obj.predicted_mean
    conf_int = forecast_obj.conf_int()

    # Create proper date index for forecast
    last_date = series.index[-1]
    forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                                   periods=steps, freq='B')

    plt.figure(figsize=(12,6))
    plt.plot(series.index, series, label=f'{name} Historical')
    plt.plot(forecast_index, forecast, label='Forecast', color='red')
    plt.fill_between(forecast_index,
                    conf_int.iloc[:, 0],
                    conf_int.iloc[:, 1],
                    color='pink',
                    alpha=0.3,
                    label='95% Confidence Interval')
    plt.title(f'{name} Price Forecast')
    plt.xlabel('Date')
    plt.ylabel('Adjusted Close Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

    # Forecast interpretation
    last_value = series.iloc[-1]
    mean_forecast = forecast.mean()
    print(f"\nForecast Interpretation for {name}:")
    print(f"Last observed value: ${last_value:.2f}")
    print(f"Average forecast value: ${mean_forecast:.2f}")
    print(f"Forecast change: ${mean_forecast - last_value:.2f}")
    if mean_forecast > last_value:
        print("Trend: Upward forecast trend")
    elif mean_forecast < last_value:
        print("Trend: Downward forecast trend")
    else:
        print("Trend: Flat forecast trend")
    print(f"95% CI range at period {steps}: [${conf_int.iloc[-1, 0]:.2f}, ${conf_int.iloc[-1, 1]:.2f}]")

# Generate detailed forecast plots and interpretations
plot_forecast(dis_model, dis_close, "DIS")
plot_forecast(aapl_model, aapl_close, "AAPL")

# Print forecast values
print("\nDIS Forecast Values (next 5 periods):")
print(dis_forecast[:5])
print("\nAAPL Forecast Values (next 5 periods):")
print(aapl_forecast[:5])

print("\n" + "="*60)
print("ANALYSIS COMPLETE")
print("="*60)
print("✓ Unit root tests performed (ADF & KPSS)")
print("✓ Cointegration analysis completed (Johansen test)")
print("✓ Optimal ARIMA models selected via AIC minimization")
print("✓ 30-day forecasts generated with 95% confidence intervals")
print("✓ Detailed interpretations provided for all results")
print(f"\nData period: {start_date} to {end_date}")
print(f"Tickers analyzed: DIS (The Walt Disney Company) and AAPL (APPLE)")
[*********************100%***********************]  2 of 2 completed
Downloading DIS and AAPL data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09

Unit Root Tests for DIS:
ADF Test:
ADF Statistic: -3.3087
p-value: 0.0145
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - DIS is stationary

KPSS Test:
KPSS Statistic: 0.7122
p-value: 0.0124
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - DIS is non-stationary

Unit Root Tests for AAPL:
ADF Test:
ADF Statistic: -0.8342
p-value: 0.8089
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value >= 0.05: Fail to reject null - AAPL may be non-stationary

KPSS Test:
KPSS Statistic: 3.8415
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - AAPL is non-stationary

Johansen Cointegration Test:
Trace statistic: [12.12681683  1.01459214]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
Interpretation:
  - r = 0: No cointegration at 95% confidence level
    Trace statistic (12.13) <= 95% critical value (15.49)
  - r = 1: No cointegration at 95% confidence level
    Trace statistic (1.01) <= 95% critical value (3.84)
Conclusion: No evidence of cointegration between DIS and AAPL

Best ARIMA model for DIS:
Order: (2, 1, 3)
AIC: 3953.08
Interpretation:
  - p=2: 2 autoregressive term(s)
  - d=1: 1 difference(s) needed for stationarity
  - q=3: 3 moving average term(s)

Best ARIMA model for AAPL:
Order: (1, 1, 0)
AIC: 4932.70
Interpretation:
  - p=1: 1 autoregressive term(s)
  - d=1: 1 difference(s) needed for stationarity
  - q=0: 0 moving average term(s)
No description has been provided for this image
No description has been provided for this image
Forecast Interpretation for DIS:
Last observed value: $110.99
Average forecast value: $111.63
Forecast change: $0.64
Trend: Upward forecast trend
95% CI range at period 30: [$88.62, $134.37]
No description has been provided for this image
Forecast Interpretation for AAPL:
Last observed value: $254.04
Average forecast value: $253.85
Forecast change: $-0.19
Trend: Downward forecast trend
95% CI range at period 30: [$216.95, $290.74]

DIS Forecast Values (next 5 periods):
946    111.258834
947    111.758426
948    112.129465
949    112.166258
950    111.876199
Name: predicted_mean, dtype: float64

AAPL Forecast Values (next 5 periods):
946    253.853858
947    253.845240
948    253.844840
949    253.844822
950    253.844821
Name: predicted_mean, dtype: float64

============================================================
ANALYSIS COMPLETE
============================================================
✓ Unit root tests performed (ADF & KPSS)
✓ Cointegration analysis completed (Johansen test)
✓ Optimal ARIMA models selected via AIC minimization
✓ 30-day forecasts generated with 95% confidence intervals
✓ Detailed interpretations provided for all results

Data period: 2022-01-01 to 2025-10-10
Tickers analyzed: DIS (The Walt Disney Company) and AAPL (APPLE)
In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

# Download stock data from Yahoo Finance
print("Downloading DIS and AAPL data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['DIS', 'AAPL']

# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
dis_close = data['Adj Close']['DIS'].dropna()
aapl_close = data['Adj Close']['AAPL'].dropna()

# Align both series to common dates
common_index = dis_close.index.intersection(aapl_close.index)
dis_close = dis_close.loc[common_index]
aapl_close = aapl_close.loc[common_index]

print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(dis_close)}")
print(f"Date range: {dis_close.index[0].date()} to {dis_close.index[-1].date()}")

# Function for unit root tests with interpretation
def unit_root_tests(series, name):
    print(f"\nUnit Root Tests for {name}:")

    # ADF Test
    adf_result = adfuller(series)
    print("ADF Test:")
    print(f'ADF Statistic: {adf_result[0]:.4f}')
    print(f'p-value: {adf_result[1]:.4f}')
    print(f'Critical Values: {adf_result[4]}')
    print("Interpretation:")
    if adf_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")

    # KPSS Test
    kpss_result = kpss(series)
    print("\nKPSS Test:")
    print(f'KPSS Statistic: {kpss_result[0]:.4f}')
    print(f'p-value: {kpss_result[1]:.4f}')
    print(f'Critical Values: {kpss_result[3]}')
    print("Interpretation:")
    if kpss_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is non-stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be stationary")

# Perform unit root tests
unit_root_tests(dis_close, "DIS")
unit_root_tests(aapl_close, "AAPL")

# Function to plot correlograms with interpretation
def plot_correlograms(series, name, lags=30):
    plt.figure(figsize=(12, 8))

    plt.subplot(2, 1, 1)
    plot_acf(series, lags=lags, ax=plt.gca())
    plt.title(f'ACF for {name}')

    plt.subplot(2, 1, 2)
    plot_pacf(series, lags=lags, ax=plt.gca())
    plt.title(f'PACF for {name}')

    plt.tight_layout()
    plt.show()

    print(f"\nCorrelogram Interpretation for {name}:")
    print("  - ACF: Shows total correlation at each lag, including indirect effects")
    print("  - PACF: Shows direct correlation at each lag, controlling for earlier lags")
    print("  - Significant spikes outside the blue confidence interval suggest strong correlations")
    print("  - ACF decay pattern indicates potential ARIMA model orders")
    print("  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order")

# Plot correlograms for original series
plot_correlograms(dis_close, "DIS Original")
plot_correlograms(aapl_close, "AAPL Original")

# Difference the series
dis_diff = dis_close.diff().dropna()
aapl_diff = aapl_close.diff().dropna()

# Plot correlograms for differenced series
plot_correlograms(dis_diff, "DIS Differenced")
plot_correlograms(aapl_diff, "AAPL Differenced")

# Cointegration test with interpretation
def cointegration_test(df):
    result = coint_johansen(df, det_order=0, k_ar_diff=1)
    print("\nJohansen Cointegration Test:")
    print(f"Trace statistic: {result.lr1}")
    print(f"Critical values (90%, 95%, 99%): {result.cvt}")
    print("Interpretation:")
    for i in range(len(result.lr1)):
        if result.lr1[i] > result.cvt[i, 1]:
            print(f"  - r = {i}: Cointegration exists at 95% confidence level")
            print(f"    Trace statistic ({result.lr1[i]:.2f}) > 95% critical value ({result.cvt[i, 1]:.2f})")
        else:
            print(f"  - r = {i}: No cointegration at 95% confidence level")
            print(f"    Trace statistic ({result.lr1[i]:.2f}) <= 95% critical value ({result.cvt[i, 1]:.2f})")
    if result.lr1[0] > result.cvt[0, 1]:
        print("Conclusion: DIS and AAPL are cointegrated - they share a long-run equilibrium relationship")
    else:
        print("Conclusion: No evidence of cointegration between DIS and AAPL")

# Prepare data for cointegration
coint_df = pd.DataFrame({
    'DIS': dis_close,
    'AAPL': aapl_close
}).dropna()
cointegration_test(coint_df)

# Function to find best ARIMA model with interpretation
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
    best_aic = float('inf')
    best_order = None

    for p in range(max_p + 1):
        for d in range(max_d + 1):
            for q in range(max_q + 1):
                try:
                    model = ARIMA(series, order=(p, d, q))
                    results = model.fit()
                    if results.aic < best_aic:
                        best_aic = results.aic
                        best_order = (p, d, q)
                except:
                    continue

    print(f"\nBest ARIMA model for {name}:")
    print(f"Order: {best_order}")
    print(f"AIC: {best_aic:.2f}")
    print("Interpretation:")
    print(f"  - p={best_order[0]}: {best_order[0]} autoregressive term(s)")
    print(f"  - d={best_order[1]}: {best_order[1]} difference(s) needed for stationarity")
    print(f"  - q={best_order[2]}: {best_order[2]} moving average term(s)")
    return best_order

# Find and fit best ARIMA models
dis_order = find_best_arima(dis_close, "DIS")
aapl_order = find_best_arima(aapl_close, "AAPL")

# Fit final ARIMA models
dis_model = ARIMA(dis_close, order=dis_order).fit()
aapl_model = ARIMA(aapl_close, order=aapl_order).fit()

# Forecast next 30 periods
forecast_steps = 30
dis_forecast = dis_model.forecast(steps=forecast_steps)
aapl_forecast = aapl_model.forecast(steps=forecast_steps)

# Create forecast index using business days
last_date = dis_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                               periods=forecast_steps, freq='B')

# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(dis_close.index, dis_close, label='DIS Historical')
plt.plot(forecast_index, dis_forecast, label='DIS Forecast', color='red')
plt.plot(aapl_close.index, aapl_close, label='AAPL Historical')
plt.plot(forecast_index, aapl_forecast, label='AAPL Forecast', color='green')
plt.title('DIS and AAPL Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Detailed forecast plot with confidence intervals and interpretation
def plot_forecast(model, series, name, steps=30):
    forecast_obj = model.get_forecast(steps=steps)
    forecast = forecast_obj.predicted_mean
    conf_int = forecast_obj.conf_int()

    # Create proper date index for forecast
    last_date = series.index[-1]
    forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                                   periods=steps, freq='B')

    plt.figure(figsize=(12,6))
    plt.plot(series.index, series, label=f'{name} Historical')
    plt.plot(forecast_index, forecast, label='Forecast', color='red')
    plt.fill_between(forecast_index,
                    conf_int.iloc[:, 0],
                    conf_int.iloc[:, 1],
                    color='pink',
                    alpha=0.3,
                    label='95% Confidence Interval')
    plt.title(f'{name} Price Forecast')
    plt.xlabel('Date')
    plt.ylabel('Adjusted Close Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

    # Forecast interpretation
    last_value = series.iloc[-1]
    mean_forecast = forecast.mean()
    print(f"\nForecast Interpretation for {name}:")
    print(f"Last observed value: ${last_value:.2f}")
    print(f"Average forecast value: ${mean_forecast:.2f}")
    print(f"Forecast change: ${mean_forecast - last_value:.2f}")
    if mean_forecast > last_value:
        print("Trend: Upward forecast trend")
    elif mean_forecast < last_value:
        print("Trend: Downward forecast trend")
    else:
        print("Trend: Flat forecast trend")
    print(f"95% CI range at period {steps}: [${conf_int.iloc[-1, 0]:.2f}, ${conf_int.iloc[-1, 1]:.2f}]")
    print("Interpretation: The wider the confidence interval, the less certain the forecast")

# Generate detailed forecast plots and interpretations
plot_forecast(dis_model, dis_close, "DIS")
plot_forecast(aapl_model, aapl_close, "AAPL")

# Plot correlograms for model residuals
plot_correlograms(dis_model.resid, "DIS ARIMA Residuals")
plot_correlograms(aapl_model.resid, "AAPL ARIMA Residuals")

# Print forecast values
print("\nDIS Forecast Values (next 5 periods):")
print(dis_forecast[:5])
print("\nAAPL Forecast Values (next 5 periods):")
print(aapl_forecast[:5])

print("\n" + "="*60)
print("COMPLETE TIME SERIES ANALYSIS")
print("="*60)
print("✓ Unit root tests (ADF & KPSS) performed")
print("✓ ACF/PACF correlograms analyzed (original & differenced)")
print("✓ Cointegration testing (Johansen test)")
print("✓ Optimal ARIMA models selected via AIC")
print("✓ 30-day forecasts with 95% confidence intervals")
print("✓ Residual diagnostics via correlograms")
print(f"\nData: DIS (The Walt Disney Company) & AAPL (Apple)")
print(f"Period: {start_date} to {end_date}")
[*********************100%***********************]  2 of 2 completed
Downloading DIS and AAPL data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09

Unit Root Tests for DIS:
ADF Test:
ADF Statistic: -3.3087
p-value: 0.0145
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - DIS is stationary

KPSS Test:
KPSS Statistic: 0.7122
p-value: 0.0124
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - DIS is non-stationary

Unit Root Tests for AAPL:
ADF Test:
ADF Statistic: -0.8342
p-value: 0.8089
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value >= 0.05: Fail to reject null - AAPL may be non-stationary

KPSS Test:
KPSS Statistic: 3.8415
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - AAPL is non-stationary

No description has been provided for this image
Correlogram Interpretation for DIS Original:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
No description has been provided for this image
Correlogram Interpretation for AAPL Original:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
No description has been provided for this image
Correlogram Interpretation for DIS Differenced:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
No description has been provided for this image
Correlogram Interpretation for AAPL Differenced:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order

Johansen Cointegration Test:
Trace statistic: [12.12681683  1.01459214]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
Interpretation:
  - r = 0: No cointegration at 95% confidence level
    Trace statistic (12.13) <= 95% critical value (15.49)
  - r = 1: No cointegration at 95% confidence level
    Trace statistic (1.01) <= 95% critical value (3.84)
Conclusion: No evidence of cointegration between DIS and AAPL

Best ARIMA model for DIS:
Order: (2, 1, 3)
AIC: 3953.08
Interpretation:
  - p=2: 2 autoregressive term(s)
  - d=1: 1 difference(s) needed for stationarity
  - q=3: 3 moving average term(s)

Best ARIMA model for AAPL:
Order: (1, 1, 0)
AIC: 4932.70
Interpretation:
  - p=1: 1 autoregressive term(s)
  - d=1: 1 difference(s) needed for stationarity
  - q=0: 0 moving average term(s)
No description has been provided for this image
No description has been provided for this image
Forecast Interpretation for DIS:
Last observed value: $110.99
Average forecast value: $111.63
Forecast change: $0.64
Trend: Upward forecast trend
95% CI range at period 30: [$88.62, $134.37]
Interpretation: The wider the confidence interval, the less certain the forecast
No description has been provided for this image
Forecast Interpretation for AAPL:
Last observed value: $254.04
Average forecast value: $253.85
Forecast change: $-0.19
Trend: Downward forecast trend
95% CI range at period 30: [$216.95, $290.74]
Interpretation: The wider the confidence interval, the less certain the forecast
No description has been provided for this image
Correlogram Interpretation for DIS ARIMA Residuals:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
No description has been provided for this image
Correlogram Interpretation for AAPL ARIMA Residuals:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order

DIS Forecast Values (next 5 periods):
946    111.258834
947    111.758426
948    112.129465
949    112.166258
950    111.876199
Name: predicted_mean, dtype: float64

AAPL Forecast Values (next 5 periods):
946    253.853858
947    253.845240
948    253.844840
949    253.844822
950    253.844821
Name: predicted_mean, dtype: float64

============================================================
COMPLETE TIME SERIES ANALYSIS
============================================================
✓ Unit root tests (ADF & KPSS) performed
✓ ACF/PACF correlograms analyzed (original & differenced)
✓ Cointegration testing (Johansen test)
✓ Optimal ARIMA models selected via AIC
✓ 30-day forecasts with 95% confidence intervals
✓ Residual diagnostics via correlograms

Data: DIS (The Walt Disney Company) & AAPL (Apple)
Period: 2022-01-01 to 2025-10-10

Conclusiones

In [ ]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
from statsmodels.tsa.stattools import adfuller, zivot_andrews
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

# Download stock data from Yahoo Finance
print("Downloading DIS and AAPL data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['DIS', 'AAPL']

# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
dis_close = data['Adj Close']['DIS'].dropna()
aapl_close = data['Adj Close']['AAPL'].dropna()

# Align both series to common dates
common_index = dis_close.index.intersection(aapl_close.index)
dis_close = dis_close.loc[common_index]
aapl_close = aapl_close.loc[common_index]

print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(dis_close)}")
print(f"Date range: {dis_close.index[0].date()} to {dis_close.index[-1].date()}")

# Function for Phillips-Perron test with interpretation
def phillips_perron_test(series, name):
    print(f"\nPhillips-Perron Test for {name}:")
    # Use adfuller with settings to approximate PP test
    pp_result = adfuller(series, regression='c', autolag='AIC', maxlag=None)
    print(f'PP Statistic: {pp_result[0]:.4f}')
    print(f'p-value: {pp_result[1]:.4f}')
    print(f'Critical Values: {pp_result[4]}')
    print("Interpretation:")
    if pp_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")
    print("  - Note: Using ADF with constant and automatic lag selection to approximate PP test")
    print("  - PP test adjusts for serial correlation and heteroskedasticity non-parametrically")
    return pp_result

# Function for Zivot-Andrews structural break test with interpretation
def zivot_andrews_test(series, name):
    print(f"\nZivot-Andrews Structural Break Test for {name}:")
    za_result = zivot_andrews(series, regression='c', autolag='AIC')
    print(f'ZA Statistic: {za_result[0]:.4f}')
    print(f'p-value: {za_result[1]:.4f}')
    print(f'Critical Values: {za_result[2]}')
    print(f'Breakpoint Index: {za_result[3]}')
    print("Interpretation:")
    if za_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} has a structural break")
        print(f"  - Breakpoint at index {za_result[3]} (position in series)")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - No clear evidence of a structural break")
    print("  - ZA test allows for a single break in intercept and/or trend")
    return za_result

# Perform Phillips-Perron tests
dis_pp = phillips_perron_test(dis_close, "DIS")
aapl_pp = phillips_perron_test(aapl_close, "AAPL")

# Perform Zivot-Andrews tests
dis_za = zivot_andrews_test(dis_close, "DIS")
aapl_za = zivot_andrews_test(aapl_close, "AAPL")

# Plot series with breakpoints
def plot_series_with_breakpoint(series, name, breakpoint_idx):
    plt.figure(figsize=(12, 6))
    plt.plot(series.index, series.values, label=f'{name} Adjusted Closing Prices')

    # Convert breakpoint index to actual date
    if 0 <= breakpoint_idx < len(series):
        breakpoint_date = series.index[breakpoint_idx]
        plt.axvline(x=breakpoint_date, color='red', linestyle='--',
                   label=f'Breakpoint ({breakpoint_date.date()})')
        print(f"  - Breakpoint date: {breakpoint_date.date()}")
    else:
        print(f"  - Warning: Breakpoint index {breakpoint_idx} out of range (0-{len(series)-1})")
        # Use a fallback vertical line at the middle of the series
        mid_idx = len(series) // 2
        mid_date = series.index[mid_idx]
        plt.axvline(x=mid_date, color='orange', linestyle=':',
                   label='Reference line (invalid breakpoint)')

    plt.title(f'{name} Adjusted Closing Prices with Structural Break')
    plt.xlabel('Date')
    plt.ylabel('Adjusted Close Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

    print(f"\nPlot Interpretation for {name}:")
    print(f"  - Red dashed line indicates the detected structural break")
    print("  - Break may reflect significant market events, policy changes, or economic shifts")
    print("  - Analyze data around this point for potential causes (e.g., earnings, news, COVID impact)")
    print("  - For DIS and AAPL, consider entertainment or technology")

# Plot series with breakpoints using the series' own datetime index
plot_series_with_breakpoint(dis_close, "DIS (The Walt Disney Company)", int(dis_za[3]))
plot_series_with_breakpoint(aapl_close, "AAPL (Apple)", int(aapl_za[3]))

# Summary analysis
print("\n" + "="*70)
print("STRUCTURAL BREAK ANALYSIS SUMMARY")
print("="*70)
print(f"DIS Phillips-Perron p-value: {dis_pp[1]:.4f} {'(Stationary)' if dis_pp[1]<0.05 else '(Non-stationary)'}")
print(f"AAPL Phillips-Perron p-value: {aapl_pp[1]:.4f} {'(Stationary)' if aapl_pp[1]<0.05 else '(Non-stationary)'}")
print(f"\nDIS Zivot-Andrews p-value: {dis_za[1]:.4f} {'(Structural break detected)' if dis_za[1]<0.05 else '(No clear break)'}")
print(f"DIS Breakpoint: {dis_close.index[int(dis_za[3])].date() if 0 <= int(dis_za[3]) < len(dis_close) else 'Invalid'}")
print(f"AAPL Zivot-Andrews p-value: {aapl_za[1]:.4f} {'(Structural break detected)' if aapl_za[1]<0.05 else '(No clear break)'}")
print(f"AAPL Breakpoint: {aapl_close.index[int(aapl_za[3])].date() if 0 <= int(aapl_za[3]) < len(aapl_close) else 'Invalid'}")
print(f"\nData Period: {start_date} to {end_date}")
print("Analysis covers potential impacts from:")
print("- COVID-19 pandemic effects and recovery")
print("- Interest rate changes and inflation")
print("- Supply chain disruptions")
print("- Company-specific events (mergers, product launches, regulatory changes)")
print("- Broader market volatility")

print("\nRecommendations:")
print("1. If structural breaks detected, consider regime-switching models")
print("2. For non-stationary series, use differencing or cointegration approaches")
print("3. Investigate specific events around breakpoint dates")
print("4. Consider sector-specific factors (entertainment for DIS, technology for AAPL)")
print("5. Validate breakpoints with external economic calendars and company news")
[*********************100%***********************]  2 of 2 completed
Downloading DIS and AAPL data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09

Phillips-Perron Test for DIS:
PP Statistic: -3.3087
p-value: 0.0145
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - DIS is stationary
  - Note: Using ADF with constant and automatic lag selection to approximate PP test
  - PP test adjusts for serial correlation and heteroskedasticity non-parametrically

Phillips-Perron Test for AAPL:
PP Statistic: -0.8342
p-value: 0.8089
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value >= 0.05: Fail to reject null - AAPL may be non-stationary
  - Note: Using ADF with constant and automatic lag selection to approximate PP test
  - PP test adjusts for serial correlation and heteroskedasticity non-parametrically

Zivot-Andrews Structural Break Test for DIS:

ZA Statistic: -3.7757
p-value: 0.4962
Critical Values: {'1%': np.float64(-5.27644), '5%': np.float64(-4.81067), '10%': np.float64(-4.56618)}
Breakpoint Index: 0
Interpretation:
  - p-value >= 0.05: Fail to reject null - No clear evidence of a structural break
  - ZA test allows for a single break in intercept and/or trend

Zivot-Andrews Structural Break Test for AAPL:
ZA Statistic: -3.9600
p-value: 0.3759
Critical Values: {'1%': np.float64(-5.27644), '5%': np.float64(-4.81067), '10%': np.float64(-4.56618)}
Breakpoint Index: 1
Interpretation:
  - p-value >= 0.05: Fail to reject null - No clear evidence of a structural break
  - ZA test allows for a single break in intercept and/or trend
  - Breakpoint date: 2022-01-03
No description has been provided for this image
Plot Interpretation for DIS (The Walt Disney Company):
  - Red dashed line indicates the detected structural break
  - Break may reflect significant market events, policy changes, or economic shifts
  - Analyze data around this point for potential causes (e.g., earnings, news, COVID impact)
  - For DIS and AAPL, consider entertainment or technology
  - Breakpoint date: 2022-01-04
No description has been provided for this image
Plot Interpretation for AAPL (Apple):
  - Red dashed line indicates the detected structural break
  - Break may reflect significant market events, policy changes, or economic shifts
  - Analyze data around this point for potential causes (e.g., earnings, news, COVID impact)
  - For DIS and AAPL, consider entertainment or technology

======================================================================
STRUCTURAL BREAK ANALYSIS SUMMARY
======================================================================
DIS Phillips-Perron p-value: 0.0145 (Stationary)
AAPL Phillips-Perron p-value: 0.8089 (Non-stationary)

DIS Zivot-Andrews p-value: 0.4962 (No clear break)
DIS Breakpoint: 2022-01-03
AAPL Zivot-Andrews p-value: 0.3759 (No clear break)
AAPL Breakpoint: 2022-01-04

Data Period: 2022-01-01 to 2025-10-10
Analysis covers potential impacts from:
- COVID-19 pandemic effects and recovery
- Interest rate changes and inflation
- Supply chain disruptions
- Company-specific events (mergers, product launches, regulatory changes)
- Broader market volatility

Recommendations:
1. If structural breaks detected, consider regime-switching models
2. For non-stationary series, use differencing or cointegration approaches
3. Investigate specific events around breakpoint dates
4. Consider sector-specific factors (entertainment for DIS, technology for AAPL)
5. Validate breakpoints with external economic calendars and company news

NVIDIA (NVDA) vs APTIV (APTV) | 2022-01-01 to 2025-10-10¶

In [1]:
import yfinance as yf
import pandas as pd

# Set the date range
start_date = '2022-01-01'
end_date = '2025-10-10'

# Fetch stock data for APTV
aptv = yf.download('APTV', start=start_date, end=end_date)

# Display the first 5 rows
print(aptv.head(5))
/tmp/ipython-input-2676107814.py:9: FutureWarning: YF.download() has changed argument auto_adjust default to True
  aptv = yf.download('APTV', start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
Price            Close        High         Low        Open   Volume
Ticker            APTV        APTV        APTV        APTV     APTV
Date                                                               
2022-01-03  166.029999  168.229996  165.000000  166.380005  1184400
2022-01-04  172.210007  174.380005  167.289993  167.649994  1713900
2022-01-05  168.279999  173.940002  168.259995  172.500000  1205500
2022-01-06  174.119995  174.259995  165.500000  168.880005  2041000
2022-01-07  169.470001  175.910004  169.360001  173.770004  1502400

In [2]:
import yfinance as yf
import pandas as pd

# Set the date range
start_date = '2022-01-01'
end_date = '2025-10-10'

# Fetch stock data for NVDA
nvda = yf.download('NVDA', start=start_date, end=end_date)

# Display the first 5 rows
print(nvda.head(5))
/tmp/ipython-input-4105848134.py:9: FutureWarning: YF.download() has changed argument auto_adjust default to True
  nvda = yf.download('NVDA', start=start_date, end=end_date)
[*********************100%***********************]  1 of 1 completed
Price           Close       High        Low       Open     Volume
Ticker           NVDA       NVDA       NVDA       NVDA       NVDA
Date                                                             
2022-01-03  30.064438  30.653330  29.729068  29.759013  391547000
2022-01-04  29.234997  30.410784  28.295764  30.220143  527154000
2022-01-05  27.552164  29.360762  27.481298  28.894638  498064000
2022-01-06  28.125084  28.384596  27.014175  27.588094  454186000
2022-01-07  27.195837  28.368632  27.006193  28.088159  409939000

In [3]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings

# Suppress the FutureWarning
warnings.filterwarnings('ignore', category=FutureWarning)

# Download stock data from Yahoo Finance
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['NVDA', 'APTV']

# Download data with explicit auto_adjust=False to get Adj Close column
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)['Adj Close']

# If the above fails, use this alternative approach:
# data = yf.download(tickers, start=start_date, end=end_date)
# data = data['Close']  # Use regular Close prices instead

# Extract individual series
nvda_close = data['NVDA']
aptv_close = data['APTV']

print("NVDA data length:", len(nvda_close))
print("APTV data length:", len(aptv_close))
print("\nNVDA data sample:")
print(nvda_close.head())
print("\nAPTV data sample:")
print(aptv_close.head())

# Remove any NaN values
nvda_close = nvda_close.dropna()
aptv_close = aptv_close.dropna()

# Align by common dates
common_index = nvda_close.index.intersection(aptv_close.index)
nvda_close = nvda_close.loc[common_index]
aptv_close = aptv_close.loc[common_index]

print(f"\nCommon data length after alignment: {len(nvda_close)}")
print(f"Date range: {nvda_close.index[0]} to {nvda_close.index[-1]}")

# Function for unit root tests
def unit_root_tests(series, name):
    print(f"\nUnit Root Tests for {name}:")

    # ADF Test
    adf_result = adfuller(series)
    print("ADF Test:")
    print(f'ADF Statistic: {adf_result[0]:.4f}')
    print(f'p-value: {adf_result[1]:.4f}')
    print(f'Critical Values: {adf_result[4]}')

    # KPSS Test
    kpss_result = kpss(series, regression='c')
    print("\nKPSS Test:")
    print(f'KPSS Statistic: {kpss_result[0]:.4f}')
    print(f'p-value: {kpss_result[1]:.4f}')
    print(f'Critical Values: {kpss_result[3]}')

# Perform unit root tests
unit_root_tests(nvda_close, "NVDA")
unit_root_tests(aptv_close, "APTV")

# Difference the series if non-stationary
nvda_diff = nvda_close.diff().dropna()
aptv_diff = aptv_close.diff().dropna()

# Function to find best ARMA model (using ARIMA with d=0)
def find_best_arma(series, name, max_p=3, max_q=3):
    best_aic = float('inf')
    best_order = None

    for p in range(max_p + 1):
        for q in range(max_q + 1):
            try:
                model = ARIMA(series, order=(p, 0, q))
                results = model.fit()
                if results.aic < best_aic:
                    best_aic = results.aic
                    best_order = (p, 0, q)
            except:
                continue

    print(f"\nBest ARMA model for {name}:")
    print(f"Order: {best_order}")
    print(f"AIC: {best_aic:.2f}")

    best_model = ARIMA(series, order=best_order).fit()
    return best_model

# Fit ARMA models
nvda_arma = find_best_arma(nvda_diff, "NVDA")
aptv_arma = find_best_arma(aptv_diff, "APTV")

# Cointegration test
def cointegration_test(df):
    result = coint_johansen(df, det_order=0, k_ar_diff=1)
    print("\nJohansen Cointegration Test:")
    print(f"Trace statistic: {result.lr1}")
    print(f"Critical values (90%, 95%, 99%): {result.cvt}")

    for i in range(len(result.lr1)):
        if result.lr1[i] > result.cvt[i, 1]:  # 95% critical value
            print(f"r = {i}: Cointegration exists at 95% confidence level")
        else:
            print(f"r = {i}: No cointegration at 95% confidence level")

# Prepare data for cointegration
coint_df = pd.DataFrame({
    'NVDA': nvda_close,
    'APTV': aptv_close
})

# Run cointegration test
cointegration_test(coint_df)

# Plot the series
plt.figure(figsize=(12,6))
plt.plot(nvda_close, label='NVDA')
plt.plot(aptv_close, label='APTV')
plt.title('NVDA vs APTV Adjusted Closing Prices')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Plot the differenced series
plt.figure(figsize=(12,6))
plt.plot(nvda_diff, label='NVDA Diff', alpha=0.7)
plt.plot(aptv_diff, label='APTV Diff', alpha=0.7)
plt.title('Differenced Series')
plt.xlabel('Date')
plt.ylabel('Price Change ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Print summary statistics
print("\nSummary Statistics:")
print(nvda_close.describe())
print("\n")
print(aptv_close.describe())
[*********************100%***********************]  2 of 2 completed
/tmp/ipython-input-2917502969.py:62: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is smaller than the p-value returned.

  kpss_result = kpss(series, regression='c')
/tmp/ipython-input-2917502969.py:62: InterpolationWarning: The test statistic is outside of the range of p-values available in the
look-up table. The actual p-value is smaller than the p-value returned.

  kpss_result = kpss(series, regression='c')
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
NVDA data length: 946
APTV data length: 946

NVDA data sample:
Date
2022-01-03    30.064438
2022-01-04    29.234997
2022-01-05    27.552164
2022-01-06    28.125084
2022-01-07    27.195837
Name: NVDA, dtype: float64

APTV data sample:
Date
2022-01-03    166.029999
2022-01-04    172.210007
2022-01-05    168.279999
2022-01-06    174.119995
2022-01-07    169.470001
Name: APTV, dtype: float64

Common data length after alignment: 946
Date range: 2022-01-03 00:00:00 to 2025-10-09 00:00:00

Unit Root Tests for NVDA:
ADF Test:
ADF Statistic: 0.9998
p-value: 0.9943
Critical Values: {'1%': np.float64(-3.4373257950466174), '5%': np.float64(-2.864619627202065), '10%': np.float64(-2.568409774784971)}

KPSS Test:
KPSS Statistic: 4.4571
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}

Unit Root Tests for APTV:
ADF Test:
ADF Statistic: -3.7523
p-value: 0.0034
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}

KPSS Test:
KPSS Statistic: 3.7936
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.
  warn('Non-stationary starting autoregressive parameters'
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.
  warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.
  warn('Non-stationary starting autoregressive parameters'
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
Best ARMA model for NVDA:
Order: (3, 0, 1)
AIC: 4549.07
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.
  warn('Non-stationary starting autoregressive parameters'
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.
  warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
Best ARMA model for APTV:
Order: (2, 0, 2)
AIC: 4300.16
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
/usr/local/lib/python3.12/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  self._init_dates(dates, freq)
Johansen Cointegration Test:
Trace statistic: [25.17646653  0.25379238]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
r = 0: Cointegration exists at 95% confidence level
r = 1: No cointegration at 95% confidence level
No description has been provided for this image
No description has been provided for this image
Summary Statistics:
count    946.000000
mean      72.756528
std       53.692774
min       11.213528
25%       23.265495
50%       47.770348
75%      122.504368
max      192.570007
Name: NVDA, dtype: float64


count    946.000000
mean      87.566406
std       21.083205
min       47.919998
25%       70.115000
50%       85.309998
75%      101.609999
max      174.119995
Name: APTV, dtype: float64

Conclusiones:

In [4]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

# Download stock data from Yahoo Finance
print("Downloading NVDA and APTV data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['NVDA', 'APTV']

# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
nvda_close = data['Adj Close']['NVDA'].dropna()
aptv_close = data['Adj Close']['APTV'].dropna()

# Align both series to common dates
common_index = nvda_close.index.intersection(aptv_close.index)
nvda_close = nvda_close.loc[common_index]
aptv_close = aptv_close.loc[common_index]

print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(nvda_close)}")
print(f"Date range: {nvda_close.index[0].date()} to {nvda_close.index[-1].date()}")

# Function for unit root tests
def unit_root_tests(series, name):
    print(f"\nUnit Root Tests for {name}:")

    # ADF Test
    adf_result = adfuller(series)
    print("ADF Test:")
    print(f'ADF Statistic: {adf_result[0]:.4f}')
    print(f'p-value: {adf_result[1]:.4f}')
    print(f'Critical Values: {adf_result[4]}')

    # KPSS Test
    kpss_result = kpss(series)
    print("\nKPSS Test:")
    print(f'KPSS Statistic: {kpss_result[0]:.4f}')
    print(f'p-value: {kpss_result[1]:.4f}')
    print(f'Critical Values: {kpss_result[3]}')

# Perform unit root tests
unit_root_tests(nvda_close, "NVDA")
unit_root_tests(aptv_close, "APTV")

# Cointegration test
def cointegration_test(df):
    result = coint_johansen(df, det_order=0, k_ar_diff=1)
    print("\nJohansen Cointegration Test:")
    print(f"Trace statistic: {result.lr1}")
    print(f"Critical values (90%, 95%, 99%): {result.cvt}")

    for i in range(len(result.lr1)):
        if result.lr1[i] > result.cvt[i, 1]:
            print(f"r = {i}: Cointegration exists at 95% confidence level")
        else:
            print(f"r = {i}: No cointegration at 95% confidence level")

# Prepare data for cointegration
coint_df = pd.DataFrame({
    'NVDA': nvda_close,
    'APTV': aptv_close
}).dropna()
cointegration_test(coint_df)

# Function to find best ARIMA model
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
    best_aic = float('inf')
    best_order = None

    for p in range(max_p + 1):
        for d in range(max_d + 1):
            for q in range(max_q + 1):
                try:
                    model = ARIMA(series, order=(p, d, q))
                    results = model.fit()
                    if results.aic < best_aic:
                        best_aic = results.aic
                        best_order = (p, d, q)
                except:
                    continue

    print(f"\nBest ARIMA model for {name}:")
    print(f"Order: {best_order}")
    print(f"AIC: {best_aic:.2f}")
    return best_order

# Find and fit best ARIMA models
nvda_order = find_best_arima(nvda_close, "NVDA")
aptv_order = find_best_arima(aptv_close, "APTV")

# Fit final ARIMA models
nvda_model = ARIMA(nvda_close, order=nvda_order).fit()
aptv_model = ARIMA(aptv_close, order=aptv_order).fit()

# Forecast next 30 periods
forecast_steps = 30
nvda_forecast = nvda_model.forecast(steps=forecast_steps)
aptv_forecast = aptv_model.forecast(steps=forecast_steps)

# Create forecast index using business days
last_date = nvda_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                               periods=forecast_steps, freq='B')

# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(nvda_close.index, nvda_close, label='NVDA Historical')
plt.plot(forecast_index, nvda_forecast, label='NVDA Forecast', color='red')
plt.plot(aptv_close.index, aptv_close, label='APTV Historical')
plt.plot(forecast_index, aptv_forecast, label='APTV Forecast', color='green')
plt.title('NVDA and APTV Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Plot detailed forecast with confidence intervals
def plot_forecast(model, series, name, steps=30):
    forecast_obj = model.get_forecast(steps=steps)
    forecast = forecast_obj.predicted_mean
    conf_int = forecast_obj.conf_int()

    # Create proper date index for forecast
    last_date = series.index[-1]
    forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                                   periods=steps, freq='B')

    plt.figure(figsize=(12,6))
    plt.plot(series.index, series, label=f'{name} Historical')
    plt.plot(forecast_index, forecast, label='Forecast', color='red')
    plt.fill_between(forecast_index,
                    conf_int.iloc[:, 0],
                    conf_int.iloc[:, 1],
                    color='pink',
                    alpha=0.3,
                    label='95% Confidence Interval')
    plt.title(f'{name} Price Forecast')
    plt.xlabel('Date')
    plt.ylabel('Adjusted Close Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

# Generate detailed forecast plots
plot_forecast(nvda_model, nvda_close, "NVDA")
plot_forecast(aptv_model, aptv_close, "APTV")

# Print forecast values
print("\nNVDA Forecast Values (next 5 periods):")
print(nvda_forecast[:5])
print("\nAPTV Forecast Values (next 5 periods):")
print(aptv_forecast[:5])

# Print model diagnostics
print("\n" + "="*50)
print("MODEL DIAGNOSTICS")
print("="*50)
print(f"\nNVDA Model: ARIMA{nvda_order}")
print(f"APTV Model: ARIMA{aptv_order}")
print("\nNote: Use model.summary() for detailed parameter estimates and diagnostics")
print("The models automatically handle:")
print("- Unit root testing (via optimal differencing d)")
print("- Cointegration analysis (Johansen test)")
print("- Optimal parameter selection (AIC minimization)")
print("- 30-day ahead forecasting with confidence intervals")
[*********************100%***********************]  2 of 2 completed
Downloading NVDA and APTV data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09

Unit Root Tests for NVDA:
ADF Test:
ADF Statistic: 0.9998
p-value: 0.9943
Critical Values: {'1%': np.float64(-3.4373257950466174), '5%': np.float64(-2.864619627202065), '10%': np.float64(-2.568409774784971)}

KPSS Test:
KPSS Statistic: 4.4571
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}

Unit Root Tests for APTV:
ADF Test:
ADF Statistic: -3.7523
p-value: 0.0034
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}

KPSS Test:
KPSS Statistic: 3.7936
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}

Johansen Cointegration Test:
Trace statistic: [25.17646653  0.25379238]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
r = 0: Cointegration exists at 95% confidence level
r = 1: No cointegration at 95% confidence level

Best ARIMA model for NVDA:
Order: (3, 1, 1)
AIC: 4552.23

Best ARIMA model for APTV:
Order: (2, 2, 3)
AIC: 4294.53
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
NVDA Forecast Values (next 5 periods):
946    192.855097
947    192.825415
948    192.379674
949    192.193828
950    192.082262
Name: predicted_mean, dtype: float64

APTV Forecast Values (next 5 periods):
946    82.800159
947    82.522239
948    82.613240
949    82.375788
950    82.389600
Name: predicted_mean, dtype: float64

==================================================
MODEL DIAGNOSTICS
==================================================

NVDA Model: ARIMA(3, 1, 1)
APTV Model: ARIMA(2, 2, 3)

Note: Use model.summary() for detailed parameter estimates and diagnostics
The models automatically handle:
- Unit root testing (via optimal differencing d)
- Cointegration analysis (Johansen test)
- Optimal parameter selection (AIC minimization)
- 30-day ahead forecasting with confidence intervals

Conclusiones 2

In [5]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

# Download stock data from Yahoo Finance
print("Downloading NVDA and APTV data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['NVDA', 'APTV']

# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
nvda_close = data['Adj Close']['NVDA'].dropna()
aptv_close = data['Adj Close']['APTV'].dropna()

# Align both series to common dates
common_index = nvda_close.index.intersection(aptv_close.index)
nvda_close = nvda_close.loc[common_index]
aptv_close = aptv_close.loc[common_index]

print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(nvda_close)}")
print(f"Date range: {nvda_close.index[0].date()} to {nvda_close.index[-1].date()}")

# Function for unit root tests with interpretation
def unit_root_tests(series, name):
    print(f"\nUnit Root Tests for {name}:")

    # ADF Test
    adf_result = adfuller(series)
    print("ADF Test:")
    print(f'ADF Statistic: {adf_result[0]:.4f}')
    print(f'p-value: {adf_result[1]:.4f}')
    print(f'Critical Values: {adf_result[4]}')
    print("Interpretation:")
    if adf_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")

    # KPSS Test
    kpss_result = kpss(series)
    print("\nKPSS Test:")
    print(f'KPSS Statistic: {kpss_result[0]:.4f}')
    print(f'p-value: {kpss_result[1]:.4f}')
    print(f'Critical Values: {kpss_result[3]}')
    print("Interpretation:")
    if kpss_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is non-stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be stationary")

# Perform unit root tests
unit_root_tests(nvda_close, "NVDA")
unit_root_tests(aptv_close, "APTV")

# Cointegration test with interpretation
def cointegration_test(df):
    result = coint_johansen(df, det_order=0, k_ar_diff=1)
    print("\nJohansen Cointegration Test:")
    print(f"Trace statistic: {result.lr1}")
    print(f"Critical values (90%, 95%, 99%): {result.cvt}")
    print("Interpretation:")
    for i in range(len(result.lr1)):
        if result.lr1[i] > result.cvt[i, 1]:
            print(f"  - r = {i}: Cointegration exists at 95% confidence level")
            print(f"    Trace statistic ({result.lr1[i]:.2f}) > 95% critical value ({result.cvt[i, 1]:.2f})")
        else:
            print(f"  - r = {i}: No cointegration at 95% confidence level")
            print(f"    Trace statistic ({result.lr1[i]:.2f}) <= 95% critical value ({result.cvt[i, 1]:.2f})")
    if result.lr1[0] > result.cvt[0, 1]:
        print("Conclusion: NVDA and APTV are cointegrated - they share a long-run equilibrium relationship")
    else:
        print("Conclusion: No evidence of cointegration between NVDA and APTV")

# Prepare data for cointegration
coint_df = pd.DataFrame({
    'NVDA': nvda_close,
    'APTV': aptv_close
}).dropna()
cointegration_test(coint_df)

# Function to find best ARIMA model with interpretation
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
    best_aic = float('inf')
    best_order = None

    for p in range(max_p + 1):
        for d in range(max_d + 1):
            for q in range(max_q + 1):
                try:
                    model = ARIMA(series, order=(p, d, q))
                    results = model.fit()
                    if results.aic < best_aic:
                        best_aic = results.aic
                        best_order = (p, d, q)
                except:
                    continue

    print(f"\nBest ARIMA model for {name}:")
    print(f"Order: {best_order}")
    print(f"AIC: {best_aic:.2f}")
    print("Interpretation:")
    print(f"  - p={best_order[0]}: {best_order[0]} autoregressive term(s)")
    print(f"  - d={best_order[1]}: {best_order[1]} difference(s) needed for stationarity")
    print(f"  - q={best_order[2]}: {best_order[2]} moving average term(s)")
    return best_order

# Find and fit best ARIMA models
nvda_order = find_best_arima(nvda_close, "NVDA")
aptv_order = find_best_arima(aptv_close, "APTV")

# Fit final ARIMA models
nvda_model = ARIMA(nvda_close, order=nvda_order).fit()
aptv_model = ARIMA(aptv_close, order=aptv_order).fit()

# Forecast next 30 periods
forecast_steps = 30
nvda_forecast = nvda_model.forecast(steps=forecast_steps)
aptv_forecast = aptv_model.forecast(steps=forecast_steps)

# Create forecast index using business days
last_date = nvda_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                               periods=forecast_steps, freq='B')

# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(nvda_close.index, nvda_close, label='NVDA Historical')
plt.plot(forecast_index, nvda_forecast, label='NVDA Forecast', color='red')
plt.plot(aptv_close.index, aptv_close, label='APTV Historical')
plt.plot(forecast_index, aptv_forecast, label='APTV Forecast', color='green')
plt.title('NVDA and APTV Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Detailed forecast plot with confidence intervals and interpretation
def plot_forecast(model, series, name, steps=30):
    forecast_obj = model.get_forecast(steps=steps)
    forecast = forecast_obj.predicted_mean
    conf_int = forecast_obj.conf_int()

    # Create proper date index for forecast
    last_date = series.index[-1]
    forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                                   periods=steps, freq='B')

    plt.figure(figsize=(12,6))
    plt.plot(series.index, series, label=f'{name} Historical')
    plt.plot(forecast_index, forecast, label='Forecast', color='red')
    plt.fill_between(forecast_index,
                    conf_int.iloc[:, 0],
                    conf_int.iloc[:, 1],
                    color='pink',
                    alpha=0.3,
                    label='95% Confidence Interval')
    plt.title(f'{name} Price Forecast')
    plt.xlabel('Date')
    plt.ylabel('Adjusted Close Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

    # Forecast interpretation
    last_value = series.iloc[-1]
    mean_forecast = forecast.mean()
    print(f"\nForecast Interpretation for {name}:")
    print(f"Last observed value: ${last_value:.2f}")
    print(f"Average forecast value: ${mean_forecast:.2f}")
    print(f"Forecast change: ${mean_forecast - last_value:.2f}")
    if mean_forecast > last_value:
        print("Trend: Upward forecast trend")
    elif mean_forecast < last_value:
        print("Trend: Downward forecast trend")
    else:
        print("Trend: Flat forecast trend")
    print(f"95% CI range at period {steps}: [${conf_int.iloc[-1, 0]:.2f}, ${conf_int.iloc[-1, 1]:.2f}]")

# Generate detailed forecast plots and interpretations
plot_forecast(nvda_model, nvda_close, "NVDA")
plot_forecast(aptv_model, aptv_close, "APTV")

# Print forecast values
print("\nNVDA Forecast Values (next 5 periods):")
print(nvda_forecast[:5])
print("\nAPTV Forecast Values (next 5 periods):")
print(aptv_forecast[:5])

print("\n" + "="*60)
print("ANALYSIS COMPLETE")
print("="*60)
print("✓ Unit root tests performed (ADF & KPSS)")
print("✓ Cointegration analysis completed (Johansen test)")
print("✓ Optimal ARIMA models selected via AIC minimization")
print("✓ 30-day forecasts generated with 95% confidence intervals")
print("✓ Detailed interpretations provided for all results")
print(f"\nData period: {start_date} to {end_date}")
print(f"Tickers analyzed: NVDA (NVIDIA) and APTV (Aptiv)")
[*********************100%***********************]  2 of 2 completed
Downloading NVDA and APTV data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09

Unit Root Tests for NVDA:
ADF Test:
ADF Statistic: 0.9998
p-value: 0.9943
Critical Values: {'1%': np.float64(-3.4373257950466174), '5%': np.float64(-2.864619627202065), '10%': np.float64(-2.568409774784971)}
Interpretation:
  - p-value >= 0.05: Fail to reject null - NVDA may be non-stationary

KPSS Test:
KPSS Statistic: 4.4571
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - NVDA is non-stationary

Unit Root Tests for APTV:
ADF Test:
ADF Statistic: -3.7523
p-value: 0.0034
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - APTV is stationary

KPSS Test:
KPSS Statistic: 3.7936
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - APTV is non-stationary

Johansen Cointegration Test:
Trace statistic: [25.17646653  0.25379238]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
Interpretation:
  - r = 0: Cointegration exists at 95% confidence level
    Trace statistic (25.18) > 95% critical value (15.49)
  - r = 1: No cointegration at 95% confidence level
    Trace statistic (0.25) <= 95% critical value (3.84)
Conclusion: NVDA and APTV are cointegrated - they share a long-run equilibrium relationship

Best ARIMA model for NVDA:
Order: (3, 1, 1)
AIC: 4552.23
Interpretation:
  - p=3: 3 autoregressive term(s)
  - d=1: 1 difference(s) needed for stationarity
  - q=1: 1 moving average term(s)

Best ARIMA model for APTV:
Order: (2, 2, 3)
AIC: 4294.53
Interpretation:
  - p=2: 2 autoregressive term(s)
  - d=2: 2 difference(s) needed for stationarity
  - q=3: 3 moving average term(s)
No description has been provided for this image
No description has been provided for this image
Forecast Interpretation for NVDA:
Last observed value: $192.57
Average forecast value: $192.18
Forecast change: $-0.39
Trend: Downward forecast trend
95% CI range at period 30: [$166.43, $217.82]
No description has been provided for this image
Forecast Interpretation for APTV:
Last observed value: $82.71
Average forecast value: $81.43
Forecast change: $-1.28
Trend: Downward forecast trend
95% CI range at period 30: [$54.94, $105.53]

NVDA Forecast Values (next 5 periods):
946    192.855097
947    192.825415
948    192.379674
949    192.193828
950    192.082262
Name: predicted_mean, dtype: float64

APTV Forecast Values (next 5 periods):
946    82.800159
947    82.522239
948    82.613240
949    82.375788
950    82.389600
Name: predicted_mean, dtype: float64

============================================================
ANALYSIS COMPLETE
============================================================
✓ Unit root tests performed (ADF & KPSS)
✓ Cointegration analysis completed (Johansen test)
✓ Optimal ARIMA models selected via AIC minimization
✓ 30-day forecasts generated with 95% confidence intervals
✓ Detailed interpretations provided for all results

Data period: 2022-01-01 to 2025-10-10
Tickers analyzed: NVDA (NVIDIA) and APTV (Aptiv)

Conclusión 3

In [6]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

# Download stock data from Yahoo Finance
print("Downloading NVDA and APTV data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['NVDA', 'APTV']

# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
nvda_close = data['Adj Close']['NVDA'].dropna()
aptv_close = data['Adj Close']['APTV'].dropna()

# Align both series to common dates
common_index = nvda_close.index.intersection(aptv_close.index)
nvda_close = nvda_close.loc[common_index]
aptv_close = aptv_close.loc[common_index]

print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(nvda_close)}")
print(f"Date range: {nvda_close.index[0].date()} to {nvda_close.index[-1].date()}")

# Function for unit root tests with interpretation
def unit_root_tests(series, name):
    print(f"\nUnit Root Tests for {name}:")

    # ADF Test
    adf_result = adfuller(series)
    print("ADF Test:")
    print(f'ADF Statistic: {adf_result[0]:.4f}')
    print(f'p-value: {adf_result[1]:.4f}')
    print(f'Critical Values: {adf_result[4]}')
    print("Interpretation:")
    if adf_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")

    # KPSS Test
    kpss_result = kpss(series)
    print("\nKPSS Test:")
    print(f'KPSS Statistic: {kpss_result[0]:.4f}')
    print(f'p-value: {kpss_result[1]:.4f}')
    print(f'Critical Values: {kpss_result[3]}')
    print("Interpretation:")
    if kpss_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is non-stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be stationary")

# Perform unit root tests
unit_root_tests(nvda_close, "NVDA")
unit_root_tests(aptv_close, "APTV")

# Function to plot correlograms with interpretation
def plot_correlograms(series, name, lags=30):
    plt.figure(figsize=(12, 8))

    plt.subplot(2, 1, 1)
    plot_acf(series, lags=lags, ax=plt.gca())
    plt.title(f'ACF for {name}')

    plt.subplot(2, 1, 2)
    plot_pacf(series, lags=lags, ax=plt.gca())
    plt.title(f'PACF for {name}')

    plt.tight_layout()
    plt.show()

    print(f"\nCorrelogram Interpretation for {name}:")
    print("  - ACF: Shows total correlation at each lag, including indirect effects")
    print("  - PACF: Shows direct correlation at each lag, controlling for earlier lags")
    print("  - Significant spikes outside the blue confidence interval suggest strong correlations")
    print("  - ACF decay pattern indicates potential ARIMA model orders")
    print("  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order")

# Plot correlograms for original series
plot_correlograms(nvda_close, "NVDA Original")
plot_correlograms(aptv_close, "APTV Original")

# Difference the series
nvda_diff = nvda_close.diff().dropna()
aptv_diff = aptv_close.diff().dropna()

# Plot correlograms for differenced series
plot_correlograms(nvda_diff, "NVDA Differenced")
plot_correlograms(aptv_diff, "APTV Differenced")

# Cointegration test with interpretation
def cointegration_test(df):
    result = coint_johansen(df, det_order=0, k_ar_diff=1)
    print("\nJohansen Cointegration Test:")
    print(f"Trace statistic: {result.lr1}")
    print(f"Critical values (90%, 95%, 99%): {result.cvt}")
    print("Interpretation:")
    for i in range(len(result.lr1)):
        if result.lr1[i] > result.cvt[i, 1]:
            print(f"  - r = {i}: Cointegration exists at 95% confidence level")
            print(f"    Trace statistic ({result.lr1[i]:.2f}) > 95% critical value ({result.cvt[i, 1]:.2f})")
        else:
            print(f"  - r = {i}: No cointegration at 95% confidence level")
            print(f"    Trace statistic ({result.lr1[i]:.2f}) <= 95% critical value ({result.cvt[i, 1]:.2f})")
    if result.lr1[0] > result.cvt[0, 1]:
        print("Conclusion: NVDA and APTV are cointegrated - they share a long-run equilibrium relationship")
    else:
        print("Conclusion: No evidence of cointegration between NVDA and APTV")

# Prepare data for cointegration
coint_df = pd.DataFrame({
    'NVDA': nvda_close,
    'APTV': aptv_close
}).dropna()
cointegration_test(coint_df)

# Function to find best ARIMA model with interpretation
def find_best_arima(series, name, max_p=3, max_d=2, max_q=3):
    best_aic = float('inf')
    best_order = None

    for p in range(max_p + 1):
        for d in range(max_d + 1):
            for q in range(max_q + 1):
                try:
                    model = ARIMA(series, order=(p, d, q))
                    results = model.fit()
                    if results.aic < best_aic:
                        best_aic = results.aic
                        best_order = (p, d, q)
                except:
                    continue

    print(f"\nBest ARIMA model for {name}:")
    print(f"Order: {best_order}")
    print(f"AIC: {best_aic:.2f}")
    print("Interpretation:")
    print(f"  - p={best_order[0]}: {best_order[0]} autoregressive term(s)")
    print(f"  - d={best_order[1]}: {best_order[1]} difference(s) needed for stationarity")
    print(f"  - q={best_order[2]}: {best_order[2]} moving average term(s)")
    return best_order

# Find and fit best ARIMA models
nvda_order = find_best_arima(nvda_close, "NVDA")
aptv_order = find_best_arima(aptv_close, "APTV")

# Fit final ARIMA models
nvda_model = ARIMA(nvda_close, order=nvda_order).fit()
aptv_model = ARIMA(aptv_close, order=aptv_order).fit()

# Forecast next 30 periods
forecast_steps = 30
nvda_forecast = nvda_model.forecast(steps=forecast_steps)
aptv_forecast = aptv_model.forecast(steps=forecast_steps)

# Create forecast index using business days
last_date = nvda_close.index[-1]
forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                               periods=forecast_steps, freq='B')

# Plot original series with forecasts
plt.figure(figsize=(12,6))
plt.plot(nvda_close.index, nvda_close, label='NVDA Historical')
plt.plot(forecast_index, nvda_forecast, label='NVDA Forecast', color='red')
plt.plot(aptv_close.index, aptv_close, label='APTV Historical')
plt.plot(forecast_index, aptv_forecast, label='APTV Forecast', color='green')
plt.title('NVDA and APTV Closing Prices with Forecasts')
plt.xlabel('Date')
plt.ylabel('Adjusted Close Price ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Detailed forecast plot with confidence intervals and interpretation
def plot_forecast(model, series, name, steps=30):
    forecast_obj = model.get_forecast(steps=steps)
    forecast = forecast_obj.predicted_mean
    conf_int = forecast_obj.conf_int()

    # Create proper date index for forecast
    last_date = series.index[-1]
    forecast_index = pd.bdate_range(start=last_date + pd.Timedelta(days=1),
                                   periods=steps, freq='B')

    plt.figure(figsize=(12,6))
    plt.plot(series.index, series, label=f'{name} Historical')
    plt.plot(forecast_index, forecast, label='Forecast', color='red')
    plt.fill_between(forecast_index,
                    conf_int.iloc[:, 0],
                    conf_int.iloc[:, 1],
                    color='pink',
                    alpha=0.3,
                    label='95% Confidence Interval')
    plt.title(f'{name} Price Forecast')
    plt.xlabel('Date')
    plt.ylabel('Adjusted Close Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

    # Forecast interpretation
    last_value = series.iloc[-1]
    mean_forecast = forecast.mean()
    print(f"\nForecast Interpretation for {name}:")
    print(f"Last observed value: ${last_value:.2f}")
    print(f"Average forecast value: ${mean_forecast:.2f}")
    print(f"Forecast change: ${mean_forecast - last_value:.2f}")
    if mean_forecast > last_value:
        print("Trend: Upward forecast trend")
    elif mean_forecast < last_value:
        print("Trend: Downward forecast trend")
    else:
        print("Trend: Flat forecast trend")
    print(f"95% CI range at period {steps}: [${conf_int.iloc[-1, 0]:.2f}, ${conf_int.iloc[-1, 1]:.2f}]")
    print("Interpretation: The wider the confidence interval, the less certain the forecast")

# Generate detailed forecast plots and interpretations
plot_forecast(nvda_model, nvda_close, "NVDA")
plot_forecast(aptv_model, aptv_close, "APTV")

# Plot correlograms for model residuals
plot_correlograms(nvda_model.resid, "NVDA ARIMA Residuals")
plot_correlograms(aptv_model.resid, "APTV ARIMA Residuals")

# Print forecast values
print("\nNVDA Forecast Values (next 5 periods):")
print(nvda_forecast[:5])
print("\nAPTV Forecast Values (next 5 periods):")
print(aptv_forecast[:5])

print("\n" + "="*60)
print("COMPLETE TIME SERIES ANALYSIS")
print("="*60)
print("✓ Unit root tests (ADF & KPSS) performed")
print("✓ ACF/PACF correlograms analyzed (original & differenced)")
print("✓ Cointegration testing (Johansen test)")
print("✓ Optimal ARIMA models selected via AIC")
print("✓ 30-day forecasts with 95% confidence intervals")
print("✓ Residual diagnostics via correlograms")
print(f"\nData: NVDA (NVIDIA) & APTV (Aptiv)")
print(f"Period: {start_date} to {end_date}")
[*********************100%***********************]  2 of 2 completed
Downloading NVDA and APTV data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09

Unit Root Tests for NVDA:

ADF Test:
ADF Statistic: 0.9998
p-value: 0.9943
Critical Values: {'1%': np.float64(-3.4373257950466174), '5%': np.float64(-2.864619627202065), '10%': np.float64(-2.568409774784971)}
Interpretation:
  - p-value >= 0.05: Fail to reject null - NVDA may be non-stationary

KPSS Test:
KPSS Statistic: 4.4571
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - NVDA is non-stationary

Unit Root Tests for APTV:
ADF Test:
ADF Statistic: -3.7523
p-value: 0.0034
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - APTV is stationary

KPSS Test:
KPSS Statistic: 3.7936
p-value: 0.0100
Critical Values: {'10%': 0.347, '5%': 0.463, '2.5%': 0.574, '1%': 0.739}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - APTV is non-stationary
No description has been provided for this image
Correlogram Interpretation for NVDA Original:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
No description has been provided for this image
Correlogram Interpretation for APTV Original:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
No description has been provided for this image
Correlogram Interpretation for NVDA Differenced:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
No description has been provided for this image
Correlogram Interpretation for APTV Differenced:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order

Johansen Cointegration Test:
Trace statistic: [25.17646653  0.25379238]
Critical values (90%, 95%, 99%): [[13.4294 15.4943 19.9349]
 [ 2.7055  3.8415  6.6349]]
Interpretation:
  - r = 0: Cointegration exists at 95% confidence level
    Trace statistic (25.18) > 95% critical value (15.49)
  - r = 1: No cointegration at 95% confidence level
    Trace statistic (0.25) <= 95% critical value (3.84)
Conclusion: NVDA and APTV are cointegrated - they share a long-run equilibrium relationship

Best ARIMA model for NVDA:
Order: (3, 1, 1)
AIC: 4552.23
Interpretation:
  - p=3: 3 autoregressive term(s)
  - d=1: 1 difference(s) needed for stationarity
  - q=1: 1 moving average term(s)

Best ARIMA model for APTV:
Order: (2, 2, 3)
AIC: 4294.53
Interpretation:
  - p=2: 2 autoregressive term(s)
  - d=2: 2 difference(s) needed for stationarity
  - q=3: 3 moving average term(s)
No description has been provided for this image
No description has been provided for this image
Forecast Interpretation for NVDA:
Last observed value: $192.57
Average forecast value: $192.18
Forecast change: $-0.39
Trend: Downward forecast trend
95% CI range at period 30: [$166.43, $217.82]
Interpretation: The wider the confidence interval, the less certain the forecast
No description has been provided for this image
Forecast Interpretation for APTV:
Last observed value: $82.71
Average forecast value: $81.43
Forecast change: $-1.28
Trend: Downward forecast trend
95% CI range at period 30: [$54.94, $105.53]
Interpretation: The wider the confidence interval, the less certain the forecast
No description has been provided for this image
Correlogram Interpretation for NVDA ARIMA Residuals:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order
No description has been provided for this image
Correlogram Interpretation for APTV ARIMA Residuals:
  - ACF: Shows total correlation at each lag, including indirect effects
  - PACF: Shows direct correlation at each lag, controlling for earlier lags
  - Significant spikes outside the blue confidence interval suggest strong correlations
  - ACF decay pattern indicates potential ARIMA model orders
  - PACF cutoff suggests AR order, while ACF cutoff suggests MA order

NVDA Forecast Values (next 5 periods):
946    192.855097
947    192.825415
948    192.379674
949    192.193828
950    192.082262
Name: predicted_mean, dtype: float64

APTV Forecast Values (next 5 periods):
946    82.800159
947    82.522239
948    82.613240
949    82.375788
950    82.389600
Name: predicted_mean, dtype: float64

============================================================
COMPLETE TIME SERIES ANALYSIS
============================================================
✓ Unit root tests (ADF & KPSS) performed
✓ ACF/PACF correlograms analyzed (original & differenced)
✓ Cointegration testing (Johansen test)
✓ Optimal ARIMA models selected via AIC
✓ 30-day forecasts with 95% confidence intervals
✓ Residual diagnostics via correlograms

Data: NVDA (NVIDIA) & APTV (Aptiv)
Period: 2022-01-01 to 2025-10-10

Conclusiones 4

In [7]:
# Import required libraries
import pandas as pd
import numpy as np
import yfinance as yf
from statsmodels.tsa.stattools import adfuller, zivot_andrews
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings('ignore')

# Download stock data from Yahoo Finance
print("Downloading NVDA and APTV data from Yahoo Finance...")
start_date = '2022-01-01'
end_date = '2025-10-10'
tickers = ['NVDA', 'APTV']

# Download data and extract adjusted close prices
data = yf.download(tickers, start=start_date, end=end_date, auto_adjust=False)
nvda_close = data['Adj Close']['NVDA'].dropna()
aptv_close = data['Adj Close']['APTV'].dropna()

# Align both series to common dates
common_index = nvda_close.index.intersection(aptv_close.index)
nvda_close = nvda_close.loc[common_index]
aptv_close = aptv_close.loc[common_index]

print(f"Data successfully downloaded and aligned!")
print(f"Common data points: {len(nvda_close)}")
print(f"Date range: {nvda_close.index[0].date()} to {nvda_close.index[-1].date()}")

# Function for Phillips-Perron test with interpretation
def phillips_perron_test(series, name):
    print(f"\nPhillips-Perron Test for {name}:")
    # Use adfuller with settings to approximate PP test
    pp_result = adfuller(series, regression='c', autolag='AIC', maxlag=None)
    print(f'PP Statistic: {pp_result[0]:.4f}')
    print(f'p-value: {pp_result[1]:.4f}')
    print(f'Critical Values: {pp_result[4]}')
    print("Interpretation:")
    if pp_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} is stationary")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - {name} may be non-stationary")
    print("  - Note: Using ADF with constant and automatic lag selection to approximate PP test")
    print("  - PP test adjusts for serial correlation and heteroskedasticity non-parametrically")
    return pp_result

# Function for Zivot-Andrews structural break test with interpretation
def zivot_andrews_test(series, name):
    print(f"\nZivot-Andrews Structural Break Test for {name}:")
    za_result = zivot_andrews(series, regression='c', autolag='AIC')
    print(f'ZA Statistic: {za_result[0]:.4f}')
    print(f'p-value: {za_result[1]:.4f}')
    print(f'Critical Values: {za_result[2]}')
    print(f'Breakpoint Index: {za_result[3]}')
    print("Interpretation:")
    if za_result[1] < 0.05:
        print(f"  - p-value < 0.05: Reject null hypothesis - {name} has a structural break")
        print(f"  - Breakpoint at index {za_result[3]} (position in series)")
    else:
        print(f"  - p-value >= 0.05: Fail to reject null - No clear evidence of a structural break")
    print("  - ZA test allows for a single break in intercept and/or trend")
    return za_result

# Perform Phillips-Perron tests
nvda_pp = phillips_perron_test(nvda_close, "NVDA")
aptv_pp = phillips_perron_test(aptv_close, "APTV")

# Perform Zivot-Andrews tests
nvda_za = zivot_andrews_test(nvda_close, "NVDA")
aptv_za = zivot_andrews_test(aptv_close, "APTV")

# Plot series with breakpoints
def plot_series_with_breakpoint(series, name, breakpoint_idx):
    plt.figure(figsize=(12, 6))
    plt.plot(series.index, series.values, label=f'{name} Adjusted Closing Prices')

    # Convert breakpoint index to actual date
    if 0 <= breakpoint_idx < len(series):
        breakpoint_date = series.index[breakpoint_idx]
        plt.axvline(x=breakpoint_date, color='red', linestyle='--',
                   label=f'Breakpoint ({breakpoint_date.date()})')
        print(f"  - Breakpoint date: {breakpoint_date.date()}")
    else:
        print(f"  - Warning: Breakpoint index {breakpoint_idx} out of range (0-{len(series)-1})")
        # Use a fallback vertical line at the middle of the series
        mid_idx = len(series) // 2
        mid_date = series.index[mid_idx]
        plt.axvline(x=mid_date, color='orange', linestyle=':',
                   label='Reference line (invalid breakpoint)')

    plt.title(f'{name} Adjusted Closing Prices with Structural Break')
    plt.xlabel('Date')
    plt.ylabel('Adjusted Close Price ($)')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

    print(f"\nPlot Interpretation for {name}:")
    print(f"  - Red dashed line indicates the detected structural break")
    print("  - Break may reflect significant market events, policy changes, or economic shifts")
    print("  - Analyze data around this point for potential causes (e.g., earnings, news, tech sector trends)")
    print("  - For NVDA and APTV, consider semiconductor and automotive tech events or AI developments")

# Plot series with breakpoints using the series' own datetime index
plot_series_with_breakpoint(nvda_close, "NVDA (NVIDIA)", int(nvda_za[3]))
plot_series_with_breakpoint(aptv_close, "APTV (Aptiv)", int(aptv_za[3]))

# Summary analysis
print("\n" + "="*70)
print("STRUCTURAL BREAK ANALYSIS SUMMARY")
print("="*70)
print(f"NVDA Phillips-Perron p-value: {nvda_pp[1]:.4f} {'(Stationary)' if nvda_pp[1]<0.05 else '(Non-stationary)'}")
print(f"APTV Phillips-Perron p-value: {aptv_pp[1]:.4f} {'(Stationary)' if aptv_pp[1]<0.05 else '(Non-stationary)'}")
print(f"\nNVDA Zivot-Andrews p-value: {nvda_za[1]:.4f} {'(Structural break detected)' if nvda_za[1]<0.05 else '(No clear break)'}")
print(f"NVDA Breakpoint: {nvda_close.index[int(nvda_za[3])].date() if 0 <= int(nvda_za[3]) < len(nvda_close) else 'Invalid'}")
print(f"APTV Zivot-Andrews p-value: {aptv_za[1]:.4f} {'(Structural break detected)' if aptv_za[1]<0.05 else '(No clear break)'}")
print(f"APTV Breakpoint: {aptv_close.index[int(aptv_za[3])].date() if 0 <= int(aptv_za[3]) < len(aptv_close) else 'Invalid'}")
print(f"\nData Period: {start_date} to {end_date}")
print("Analysis covers potential impacts from:")
print("- COVID-19 pandemic effects and recovery")
print("- Interest rate changes and inflation")
print("- Supply chain disruptions")
print("- Company-specific events (chip shortages, AI advancements, automotive tech developments)")
print("- Broader market volatility in tech and automotive sectors")

print("\nRecommendations:")
print("1. If structural breaks detected, consider regime-switching models")
print("2. For non-stationary series, use differencing or cointegration approaches")
print("3. Investigate specific events around breakpoint dates")
print("4. Consider sector-specific factors (semiconductors for NVDA, automotive tech for APTV)")
print("5. Validate breakpoints with external economic calendars and company news")
[                       0%                       ]
[*********************100%***********************]  2 of 2 completed
Downloading NVDA and APTV data from Yahoo Finance...
Data successfully downloaded and aligned!
Common data points: 946
Date range: 2022-01-03 to 2025-10-09

Phillips-Perron Test for NVDA:
PP Statistic: 0.9998
p-value: 0.9943
Critical Values: {'1%': np.float64(-3.4373257950466174), '5%': np.float64(-2.864619627202065), '10%': np.float64(-2.568409774784971)}
Interpretation:
  - p-value >= 0.05: Fail to reject null - NVDA may be non-stationary
  - Note: Using ADF with constant and automatic lag selection to approximate PP test
  - PP test adjusts for serial correlation and heteroskedasticity non-parametrically

Phillips-Perron Test for APTV:
PP Statistic: -3.7523
p-value: 0.0034
Critical Values: {'1%': np.float64(-3.4372887850912175), '5%': np.float64(-2.8646033071530703), '10%': np.float64(-2.568401081996585)}
Interpretation:
  - p-value < 0.05: Reject null hypothesis - APTV is stationary
  - Note: Using ADF with constant and automatic lag selection to approximate PP test
  - PP test adjusts for serial correlation and heteroskedasticity non-parametrically

Zivot-Andrews Structural Break Test for NVDA:
ZA Statistic: -2.8713
p-value: 0.9417
Critical Values: {'1%': np.float64(-5.27644), '5%': np.float64(-4.81067), '10%': np.float64(-4.56618)}
Breakpoint Index: 6
Interpretation:
  - p-value >= 0.05: Fail to reject null - No clear evidence of a structural break
  - ZA test allows for a single break in intercept and/or trend

Zivot-Andrews Structural Break Test for APTV:
ZA Statistic: -4.6936
p-value: 0.0718
Critical Values: {'1%': np.float64(-5.27644), '5%': np.float64(-4.81067), '10%': np.float64(-4.56618)}
Breakpoint Index: 0
Interpretation:
  - p-value >= 0.05: Fail to reject null - No clear evidence of a structural break
  - ZA test allows for a single break in intercept and/or trend
  - Breakpoint date: 2022-01-11
No description has been provided for this image
Plot Interpretation for NVDA (NVIDIA):
  - Red dashed line indicates the detected structural break
  - Break may reflect significant market events, policy changes, or economic shifts
  - Analyze data around this point for potential causes (e.g., earnings, news, tech sector trends)
  - For NVDA and APTV, consider semiconductor and automotive tech events or AI developments
  - Breakpoint date: 2022-01-03
No description has been provided for this image
Plot Interpretation for APTV (Aptiv):
  - Red dashed line indicates the detected structural break
  - Break may reflect significant market events, policy changes, or economic shifts
  - Analyze data around this point for potential causes (e.g., earnings, news, tech sector trends)
  - For NVDA and APTV, consider semiconductor and automotive tech events or AI developments

======================================================================
STRUCTURAL BREAK ANALYSIS SUMMARY
======================================================================
NVDA Phillips-Perron p-value: 0.9943 (Non-stationary)
APTV Phillips-Perron p-value: 0.0034 (Stationary)

NVDA Zivot-Andrews p-value: 0.9417 (No clear break)
NVDA Breakpoint: 2022-01-11
APTV Zivot-Andrews p-value: 0.0718 (No clear break)
APTV Breakpoint: 2022-01-03

Data Period: 2022-01-01 to 2025-10-10
Analysis covers potential impacts from:
- COVID-19 pandemic effects and recovery
- Interest rate changes and inflation
- Supply chain disruptions
- Company-specific events (chip shortages, AI advancements, automotive tech developments)
- Broader market volatility in tech and automotive sectors

Recommendations:
1. If structural breaks detected, consider regime-switching models
2. For non-stationary series, use differencing or cointegration approaches
3. Investigate specific events around breakpoint dates
4. Consider sector-specific factors (semiconductors for NVDA, automotive tech for APTV)
5. Validate breakpoints with external economic calendars and company news
In [8]:
# PARA EXPORTAR

from google.colab import files
import nbformat
from nbconvert import HTMLExporter

# Paso 1: Cargar tu archivo .ipynb
print("Por favor, selecciona tu archivo .ipynb")
uploaded = files.upload()

# Paso 2: Obtener el nombre del archivo cargado
notebook_filename = list(uploaded.keys())[0]
print(f"\nArchivo cargado: {notebook_filename}")

# Paso 3: Leer el notebook
with open(notebook_filename, 'r', encoding='utf-8') as f:
    notebook = nbformat.read(f, as_version=4)

# Paso 4: Convertir a HTML
print("Convirtiendo a HTML...")
html_exporter = HTMLExporter()
html_exporter.template_name = 'classic'
(body, resources) = html_exporter.from_notebook_node(notebook)

# Paso 5: Guardar el archivo HTML
html_filename = notebook_filename.replace('.ipynb', '.html')
with open(html_filename, 'w', encoding='utf-8') as f:
    f.write(body)

print(f"Conversión completada: {html_filename}")

# Paso 6: Descargar el archivo HTML
print("Descargando archivo HTML...")
files.download(html_filename)
print("¡Listo! Tu archivo HTML ha sido descargado.")
Por favor, selecciona tu archivo .ipynb
Upload widget is only available when the cell has been executed in the current browser session. Please rerun this cell to enable.
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
/tmp/ipython-input-623164153.py in <cell line: 0>()
      7 # Paso 1: Cargar tu archivo .ipynb
      8 print("Por favor, selecciona tu archivo .ipynb")
----> 9 uploaded = files.upload()
     10 
     11 # Paso 2: Obtener el nombre del archivo cargado

/usr/local/lib/python3.12/dist-packages/google/colab/files.py in upload(target_dir)
     70   """
     71 
---> 72   uploaded_files = _upload_files(multiple=True)
     73   # Mapping from original filename to filename as saved locally.
     74   local_filenames = dict()

/usr/local/lib/python3.12/dist-packages/google/colab/files.py in _upload_files(multiple)
    162 
    163   # First result is always an indication that the file picker has completed.
--> 164   result = _output.eval_js(
    165       'google.colab._files._uploadFiles("{input_id}", "{output_id}")'.format(
    166           input_id=input_id, output_id=output_id

/usr/local/lib/python3.12/dist-packages/google/colab/output/_js.py in eval_js(script, ignore_result, timeout_sec)
     38   if ignore_result:
     39     return
---> 40   return _message.read_reply_from_input(request_id, timeout_sec)
     41 
     42 

/usr/local/lib/python3.12/dist-packages/google/colab/_message.py in read_reply_from_input(message_id, timeout_sec)
     94     reply = _read_next_input_message()
     95     if reply == _NOT_READY or not isinstance(reply, dict):
---> 96       time.sleep(0.025)
     97       continue
     98     if (

KeyboardInterrupt: